Fix bug http://archives.postgresql.org/pgsql-bugs/2006-10/msg00258.php.

author Teodor Sigaev <teodor@sigaev.ru>

Mon, 20 Nov 2006 14:03:30 +0000 (14:03 +0000)

committer Teodor Sigaev <teodor@sigaev.ru>

Mon, 20 Nov 2006 14:03:30 +0000 (14:03 +0000)
author Teodor Sigaev <teodor@sigaev.ru>
Mon, 20 Nov 2006 14:03:30 +0000 (14:03 +0000)
committer Teodor Sigaev <teodor@sigaev.ru>
Mon, 20 Nov 2006 14:03:30 +0000 (14:03 +0000)
diff --git a/contrib/tsearch2/dict_ex.c b/contrib/tsearch2/dict_ex.c

index ccb7f3fcbe30be70003b274d66b65e1ae04e9643..2fd5cbb7009393aa98bb74391dacbe06b46f6514 100644 (file)
--- a/contrib/tsearch2/dict_ex.c
+++ b/contrib/tsearch2/dict_ex.c
@@ -1,4 +1,4 @@
-/* $PostgreSQL: pgsql/contrib/tsearch2/dict_ex.c,v 1.8 2006/03/11 04:38:30 momjian Exp $ */
+/* $PostgreSQL: pgsql/contrib/tsearch2/dict_ex.c,v 1.9 2006/11/20 14:03:30 teodor Exp $ */
  
  /*
   * example of dictionary
@@ -52,9 +52,11 @@ dex_lexize(PG_FUNCTION_ARGS)
  {
         DictExample *d = (DictExample *) PG_GETARG_POINTER(0);
         char       *in = (char *) PG_GETARG_POINTER(1);
-       char       *txt = pnstrdup(in, PG_GETARG_INT32(2));
+       char       *utxt = pnstrdup(in, PG_GETARG_INT32(2));
         TSLexeme   *res = palloc(sizeof(TSLexeme) * 2);
+       char       *txt = lowerstr(utxt);
  
+       pfree(utxt);
         memset(res, 0, sizeof(TSLexeme) * 2);
  
         if (*txt == '\0' || searchstoplist(&(d->stoplist), txt))
diff --git a/contrib/tsearch2/dict_snowball.c b/contrib/tsearch2/dict_snowball.c

index f983ae8e13b2951c317bd14644ce2d34ff5cde1a..666774482490337ce1233f3998318492e0e303ae 100644 (file)
--- a/contrib/tsearch2/dict_snowball.c
+++ b/contrib/tsearch2/dict_snowball.c
@@ -1,4 +1,4 @@
-/* $PostgreSQL: pgsql/contrib/tsearch2/dict_snowball.c,v 1.12 2006/07/11 16:35:31 momjian Exp $ */
+/* $PostgreSQL: pgsql/contrib/tsearch2/dict_snowball.c,v 1.13 2006/11/20 14:03:30 teodor Exp $ */
  
  /*
   * example of Snowball dictionary
@@ -142,9 +142,11 @@ snb_lexize(PG_FUNCTION_ARGS)
  {
         DictSnowball *d = (DictSnowball *) PG_GETARG_POINTER(0);
         char       *in = (char *) PG_GETARG_POINTER(1);
-       char       *txt = pnstrdup(in, PG_GETARG_INT32(2));
+       char       *utxt = pnstrdup(in, PG_GETARG_INT32(2));
         TSLexeme   *res = palloc(sizeof(TSLexeme) * 2);
-
+       char       *txt = lowerstr(utxt);
+       
+       pfree(utxt);
         memset(res, 0, sizeof(TSLexeme) * 2);
         if (*txt == '\0' || searchstoplist(&(d->stoplist), txt))
         {
diff --git a/contrib/tsearch2/dict_syn.c b/contrib/tsearch2/dict_syn.c

index d19686d63e0a2305505729d6178403dd635fbdd0..cddbd473508285a0f138fa732732b7b2bfe51785 100644 (file)
--- a/contrib/tsearch2/dict_syn.c
+++ b/contrib/tsearch2/dict_syn.c
@@ -1,4 +1,4 @@
-/* $PostgreSQL: pgsql/contrib/tsearch2/dict_syn.c,v 1.9 2006/03/11 04:38:30 momjian Exp $ */
+/* $PostgreSQL: pgsql/contrib/tsearch2/dict_syn.c,v 1.10 2006/11/20 14:03:30 teodor Exp $ */
  
  /*
   * ISpell interface
@@ -132,8 +132,8 @@ syn_init(PG_FUNCTION_ARGS)
                         continue;
                 *end = '\0';
  
-               d->syn[cur].in = strdup(lowerstr(starti));
-               d->syn[cur].out = strdup(lowerstr(starto));
+               d->syn[cur].in = lowerstr(starti);
+               d->syn[cur].out = lowerstr(starto);
                 if (!(d->syn[cur].in && d->syn[cur].out))
                 {
                         fclose(fin);
@@ -163,12 +163,15 @@ syn_lexize(PG_FUNCTION_ARGS)
         Syn                     key,
                            *found;
         TSLexeme   *res = NULL;
+       char       *wrd;
  
         if (!PG_GETARG_INT32(2))
                 PG_RETURN_POINTER(NULL);
  
         key.out = NULL;
-       key.in = lowerstr(pnstrdup(in, PG_GETARG_INT32(2)));
+       wrd = pnstrdup(in, PG_GETARG_INT32(2));
+       key.in = lowerstr(wrd);
+       pfree(wrd);
  
         found = (Syn *) bsearch(&key, d->syn, d->len, sizeof(Syn), compareSyn);
         pfree(key.in);
diff --git a/contrib/tsearch2/ispell/spell.c b/contrib/tsearch2/ispell/spell.c

index 9e4d689cd41167ca5ddc7271ae1b074efe868a13..6eedc7f3426d2508b93f20a63c917f2264de4ee4 100644 (file)
--- a/contrib/tsearch2/ispell/spell.c
+++ b/contrib/tsearch2/ispell/spell.c
@@ -147,7 +147,7 @@ NIAddSpell(IspellDict * Conf, const char *word, const char *flag)
  int
  NIImportDictionary(IspellDict * Conf, const char *filename)
  {
-       char            str[BUFSIZ];
+       char            str[BUFSIZ], *pstr;
         FILE       *dict;
  
         if (!(dict = fopen(filename, "r")))
@@ -190,9 +190,10 @@ NIImportDictionary(IspellDict * Conf, const char *filename)
                         }
                         s += pg_mblen(s);
                 }
-               lowerstr(str);
+               pstr = lowerstr(str);
  
-               NIAddSpell(Conf, str, flag);
+               NIAddSpell(Conf, pstr, flag);
+               pfree(pstr);
         }
         fclose(dict);
         return (0);
@@ -418,8 +419,7 @@ parse_affentry(char *str, char *mask, char *find, char *repl, int line)
  int
  NIImportAffixes(IspellDict * Conf, const char *filename)
  {
-       char            str[BUFSIZ];
-       char            tmpstr[BUFSIZ];
+       char            str[BUFSIZ], *pstr = NULL;
         char            mask[BUFSIZ];
         char            find[BUFSIZ];
         char            repl[BUFSIZ];
@@ -439,11 +439,14 @@ NIImportAffixes(IspellDict * Conf, const char *filename)
         while (fgets(str, sizeof(str), affix))
         {
                 line++;
+               if ( *str == '#' || *str == '\n' )
+                       continue;
+
                 pg_verifymbstr(str, strlen(str), false);
-               memcpy(tmpstr, str, 32);        /* compoundwords... */
-               tmpstr[32] = '\0';
-               lowerstr(tmpstr);
-               if (STRNCMP(tmpstr, "compoundwords") == 0)
+               if ( pstr )
+                       pfree( pstr );
+               pstr = lowerstr(str);
+               if (STRNCMP(pstr, "compoundwords") == 0)
                 {
                         s = findchar(str, 'l');
                         if (s)
@@ -458,21 +461,21 @@ NIImportAffixes(IspellDict * Conf, const char *filename)
                                 continue;
                         }
                 }
-               if (STRNCMP(tmpstr, "suffixes") == 0)
+               if (STRNCMP(pstr, "suffixes") == 0)
                 {
                         suffixes = 1;
                         prefixes = 0;
                         oldformat++;
                         continue;
                 }
-               if (STRNCMP(tmpstr, "prefixes") == 0)
+               if (STRNCMP(pstr, "prefixes") == 0)
                 {
                         suffixes = 0;
                         prefixes = 1;
                         oldformat++;
                         continue;
                 }
-               if (STRNCMP(tmpstr, "flag") == 0)
+               if (STRNCMP(pstr, "flag") == 0)
                 {
                         s = str + 4;
                         flagflags = 0;
@@ -523,14 +526,16 @@ NIImportAffixes(IspellDict * Conf, const char *filename)
                 if ((!suffixes) && (!prefixes))
                         continue;
  
-               lowerstr(str);
-               if (!parse_affentry(str, mask, find, repl, line))
+               if (!parse_affentry(pstr, mask, find, repl, line)) 
                         continue;
  
                 NIAddAffix(Conf, flag, flagflags, mask, find, repl, suffixes ? FF_SUFFIX : FF_PREFIX);
         }
         fclose(affix);
  
+       if ( pstr )
+               pfree( pstr );
+
         return (0);
  }
  
@@ -538,11 +543,11 @@ int
  NIImportOOAffixes(IspellDict * Conf, const char *filename)
  {
         char            str[BUFSIZ];
-       char            type[BUFSIZ];
+       char            type[BUFSIZ], *ptype = NULL;
         char            sflag[BUFSIZ];
-       char            mask[BUFSIZ];
-       char            find[BUFSIZ];
-       char            repl[BUFSIZ];
+       char            mask[BUFSIZ], *pmask;
+       char            find[BUFSIZ], *pfind;
+       char            repl[BUFSIZ], *prepl;
         bool            isSuffix = false;
         int                     flag = 0;
         char            flagflags = 0;
@@ -577,8 +582,10 @@ NIImportOOAffixes(IspellDict * Conf, const char *filename)
  
                 scanread = sscanf(str, scanbuf, type, sflag, find, repl, mask);
  
-               lowerstr(type);
-               if (scanread < 4 || (STRNCMP(type, "sfx") && STRNCMP(type, "pfx")))
+               if (ptype)
+                       pfree(ptype);
+               ptype = lowerstr(type);
+               if (scanread < 4 || (STRNCMP(ptype, "sfx") && STRNCMP(ptype, "pfx")))
                         continue;
  
                 if (scanread == 4)
@@ -586,29 +593,35 @@ NIImportOOAffixes(IspellDict * Conf, const char *filename)
                         if (strlen(sflag) != 1)
                                 continue;
                         flag = *sflag;
-                       isSuffix = (STRNCMP(type, "sfx") == 0) ? true : false;
-                       lowerstr(find);
+                       isSuffix = (STRNCMP(ptype, "sfx") == 0) ? true : false;
+                       pfind = lowerstr(find);
                         if (t_iseq(find, 'y'))
                                 flagflags |= FF_CROSSPRODUCT;
                         else
                                 flagflags = 0;
+                       pfree(pfind);
                 }
                 else
                 {
                         if (strlen(sflag) != 1 || flag != *sflag || flag == 0)
                                 continue;
-                       lowerstr(repl);
-                       lowerstr(find);
-                       lowerstr(mask);
+                       prepl = lowerstr(repl);
+                       pfind = lowerstr(find);
+                       pmask = lowerstr(mask);
                         if (t_iseq(find, '0'))
                                 *find = '\0';
                         if (t_iseq(repl, '0'))
                                 *repl = '\0';
  
                         NIAddAffix(Conf, flag, flagflags, mask, find, repl, isSuffix ? FF_SUFFIX : FF_PREFIX);
+                       pfree(prepl);
+                       pfree(pfind);
+                       pfree(pmask);
                 }
         }
  
+       if (ptype)
+               pfree(ptype);
         fclose(affix);
  
         return 0;
@@ -1053,7 +1066,6 @@ NormalizeSubWord(IspellDict * Conf, char *word, char flag)
  
         if (wrdlen > MAXNORMLEN)
                 return NULL;
-       lowerstr(word);
         cur = forms = (char **) palloc(MAX_NORM * sizeof(char *));
         *cur = NULL;
  
@@ -1354,13 +1366,17 @@ SplitToVariants(IspellDict * Conf, SPNode * snode, SplitVar * orig, char *word,
  }
  
  TSLexeme *
-NINormalizeWord(IspellDict * Conf, char *word)
+NINormalizeWord(IspellDict * Conf, char *uword)
  {
-       char      **res = NormalizeSubWord(Conf, word, 0);
+       char      **res;
+       char       *word;
         TSLexeme   *lcur = NULL,
                            *lres = NULL;
         uint16          NVariant = 1;
  
+       word = lowerstr(uword);
+       res = NormalizeSubWord(Conf, word, 0);
+
         if (res)
         {
                 char      **ptr = res;
@@ -1431,6 +1447,9 @@ NINormalizeWord(IspellDict * Conf, char *word)
                         var = ptr;
                 }
         }
+
+       pfree(word);
+
         return lres;
  }
  
diff --git a/contrib/tsearch2/stopword.c b/contrib/tsearch2/stopword.c

index 73db8abba69a16f9bce6b404053668fa44c0eb12..b9b7699594ed53600b992bce7c947419dbc3c861 100644 (file)
--- a/contrib/tsearch2/stopword.c
+++ b/contrib/tsearch2/stopword.c
@@ -36,7 +36,7 @@ readstoplist(text *in, StopList * s)
         {
                 char       *filename = to_absfilename(text2char(in));
                 FILE       *hin;
-               char            buf[STOPBUFLEN];
+               char            buf[STOPBUFLEN], *pbuf;
                 int                     reallen = 0;
  
                 if ((hin = fopen(filename, "r")) == NULL)
@@ -49,7 +49,6 @@ readstoplist(text *in, StopList * s)
                 {
                         buf[strlen(buf) - 1] = '\0';
                         pg_verifymbstr(buf, strlen(buf), false);
-                       lowerstr(buf);
                         if (*buf == '\0')
                                 continue;
  
@@ -70,7 +69,14 @@ readstoplist(text *in, StopList * s)
                                 stop = tmp;
                         }
  
-                       stop[s->len] = strdup(buf);
+                       if (s->wordop) 
+                       {
+                               pbuf = s->wordop(buf);
+                               stop[s->len] = strdup(pbuf);
+                               pfree(pbuf);
+                       } else
+                               stop[s->len] = strdup(buf);
+
                         if (!stop[s->len])
                         {
                                 freestoplist(s);
@@ -79,8 +85,6 @@ readstoplist(text *in, StopList * s)
                                                 (errcode(ERRCODE_OUT_OF_MEMORY),
                                                  errmsg("out of memory")));
                         }
-                       if (s->wordop)
-                               stop[s->len] = (s->wordop) (stop[s->len]);
  
                         (s->len)++;
                 }
@@ -106,7 +110,5 @@ sortstoplist(StopList * s)
  bool
  searchstoplist(StopList * s, char *key)
  {
-       if (s->wordop)
-               key = (*(s->wordop)) (key);
         return (s->stop && s->len > 0 && bsearch(&key, s->stop, s->len, sizeof(char *), comparestr)) ? true : false;
  }
diff --git a/contrib/tsearch2/ts_locale.c b/contrib/tsearch2/ts_locale.c

index 203c977e4eab3006ddf7cb679d9f57fcdfb7640e..cac5317a1057140b6a598e6882fbd304786c9021 100644 (file)
--- a/contrib/tsearch2/ts_locale.c
+++ b/contrib/tsearch2/ts_locale.c
@@ -14,21 +14,12 @@ wchar2char(char *to, const wchar_t *from, size_t len)
  {
         if (GetDatabaseEncoding() == PG_UTF8)
         {
-               int                     r,
-                                       nbytes;
+               int                     r;
  
                 if (len == 0)
                         return 0;
  
-               /* in any case, *to should be allocated with enough space */
-               nbytes = WideCharToMultiByte(CP_UTF8, 0, from, len, NULL, 0, NULL, NULL);
-               if (nbytes == 0)
-                       ereport(ERROR,
-                                       (errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
-                                        errmsg("UTF-16 to UTF-8 translation failed: %lu",
-                                                       GetLastError())));
-
-               r = WideCharToMultiByte(CP_UTF8, 0, from, len, to, nbytes,
+               r = WideCharToMultiByte(CP_UTF8, 0, from, -1, to, len,
                                                                 NULL, NULL);
  
                 if (r == 0)
@@ -36,6 +27,8 @@ wchar2char(char *to, const wchar_t *from, size_t len)
                                         (errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
                                          errmsg("UTF-16 to UTF-8 translation failed: %lu",
                                                         GetLastError())));
+               Assert(r <= len);
+
                 return r;
         }
  
@@ -56,7 +49,7 @@ char2wchar(wchar_t *to, const char *from, size_t len)
  
                 if (!r)
                 {
-                       pg_verifymbstr(from, len, false);
+                       pg_verifymbstr(from, strlen(from), false);
                         ereport(ERROR,
                                         (errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
                                          errmsg("invalid multibyte character for locale"),
@@ -97,6 +90,11 @@ char *
  lowerstr(char *str)
  {
         char       *ptr = str;
+       char       *out;
+       int                     len = strlen(str);
+
+       if ( len == 0 )
+               return pstrdup("");
  
  #ifdef TS_USE_WIDE
  
@@ -110,24 +108,67 @@ lowerstr(char *str)
         {
                 wchar_t    *wstr,
                                    *wptr;
-               int                     len = strlen(str);
+               int                 wlen;
+
+               /* 
+                *alloc number of wchar_t for worst case, len contains
+                * number of bytes <= number of characters and
+                * alloc 1 wchar_t for 0, because wchar2char(wcstombs in really)
+                * wants zero-terminated string
+                */
+               wptr = wstr = (wchar_t *) palloc(sizeof(wchar_t) * (len+1));
+
+               /*
+                * str SHOULD be cstring, so wlen contains number
+                * of converted character
+                */
+               wlen = char2wchar(wstr, str, len);
+               if ( wlen < 0 )
+                       ereport(ERROR,
+                                       (errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
+                                        errmsg("transalation failed from server encoding to wchar_t")));
+
+               Assert(wlen<=len);
+               wstr[wlen] = 0;
  
-               wptr = wstr = (wchar_t *) palloc(sizeof(wchar_t) * (len + 1));
-               char2wchar(wstr, str, len + 1);
                 while (*wptr)
                 {
                         *wptr = towlower((wint_t) *wptr);
                         wptr++;
                 }
-               wchar2char(str, wstr, len);
+
+               /*
+                * Alloc result string for worst case + '\0'
+                */
+               len = sizeof(char)*pg_database_encoding_max_length()*(wlen+1);
+               out = (char*)palloc(len);
+
+               /*
+                * wlen now is number of bytes which is always >= number of characters
+                */
+               wlen = wchar2char(out, wstr, len);
                 pfree(wstr);
+
+               if ( wlen < 0 )
+                       ereport(ERROR,
+                                       (errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
+                                        errmsg("transalation failed from wchar_t to server encoding %d", errno)));
+               Assert(wlen<=len);
+               out[wlen]='\0';
         }
         else
  #endif
+       {
+               char *outptr;
+
+               outptr = out = (char*)palloc( sizeof(char) * (len+1) );
                 while (*ptr)
                 {
-                       *ptr = tolower(*(unsigned char *) ptr);
+                       *outptr++ = tolower(*(unsigned char *) ptr);
                         ptr++;
                 }
-       return str;
+               *outptr = '\0';
+       }
+
+       return out;
  }
author	Teodor Sigaev <teodor@sigaev.ru>
	Mon, 20 Nov 2006 14:03:30 +0000 (14:03 +0000)
committer	Teodor Sigaev <teodor@sigaev.ru>
	Mon, 20 Nov 2006 14:03:30 +0000 (14:03 +0000)
contrib/tsearch2/dict_ex.c		patch \| blob \| history
contrib/tsearch2/dict_snowball.c		patch \| blob \| history
contrib/tsearch2/dict_syn.c		patch \| blob \| history
contrib/tsearch2/ispell/spell.c		patch \| blob \| history
contrib/tsearch2/stopword.c		patch \| blob \| history
contrib/tsearch2/ts_locale.c		patch \| blob \| history