Update copyright for 2014

[postgresql] / src / backend / tsearch / spell.c
diff --git a/src/backend/tsearch/spell.c b/src/backend/tsearch/spell.c

index 71a77a1dcc2ec267f4cc0d95b04c4b8c9e3b16c3..1ca64423297d2a88a2dabbf89758a223ac41a6e5 100644 (file)
--- a/src/backend/tsearch/spell.c
+++ b/src/backend/tsearch/spell.c
@@ -3,17 +3,18 @@
   * spell.c
   *             Normalizing word with ISpell
   *
- * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1996-2014, PostgreSQL Global Development Group
   *
   *
   * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/tsearch/spell.c,v 1.15 2009/01/29 16:22:10 teodor Exp $
+ *       src/backend/tsearch/spell.c
   *
   *-------------------------------------------------------------------------
   */
  
  #include "postgres.h"
  
+#include "catalog/pg_collation.h"
  #include "tsearch/dicts/spell.h"
  #include "tsearch/ts_locale.h"
  #include "utils/memutils.h"
@@ -21,42 +22,114 @@
  
  /*
   * Initialization requires a lot of memory that's not needed
- * after the initialization is done.  In init function,
- * CurrentMemoryContext is a long lived memory context associated
- * with the dictionary cache entry, so we use a temporary context
- * for the short-lived stuff.
+ * after the initialization is done.  During initialization,
+ * CurrentMemoryContext is the long-lived memory context associated
+ * with the dictionary cache entry.  We keep the short-lived stuff
+ * in the Conf->buildCxt context.
   */
-static MemoryContext tmpCtx = NULL;
+#define tmpalloc(sz)  MemoryContextAlloc(Conf->buildCxt, (sz))
+#define tmpalloc0(sz)  MemoryContextAllocZero(Conf->buildCxt, (sz))
  
-#define tmpalloc(sz)  MemoryContextAlloc(tmpCtx, (sz))
-#define tmpalloc0(sz)  MemoryContextAllocZero(tmpCtx, (sz))
-
-static void
-checkTmpCtx(void)
+/*
+ * Prepare for constructing an ISpell dictionary.
+ *
+ * The IspellDict struct is assumed to be zeroed when allocated.
+ */
+void
+NIStartBuild(IspellDict *Conf)
  {
         /*
-        * XXX: This assumes that CurrentMemoryContext doesn't have any children
-        * other than the one we create here.
+        * The temp context is a child of CurTransactionContext, so that it will
+        * go away automatically on error.
          */
-       if (CurrentMemoryContext->firstchild == NULL)
+       Conf->buildCxt = AllocSetContextCreate(CurTransactionContext,
+                                                                                  "Ispell dictionary init context",
+                                                                                  ALLOCSET_DEFAULT_MINSIZE,
+                                                                                  ALLOCSET_DEFAULT_INITSIZE,
+                                                                                  ALLOCSET_DEFAULT_MAXSIZE);
+}
+
+/*
+ * Clean up when dictionary construction is complete.
+ */
+void
+NIFinishBuild(IspellDict *Conf)
+{
+       /* Release no-longer-needed temp memory */
+       MemoryContextDelete(Conf->buildCxt);
+       /* Just for cleanliness, zero the now-dangling pointers */
+       Conf->buildCxt = NULL;
+       Conf->Spell = NULL;
+       Conf->firstfree = NULL;
+}
+
+
+/*
+ * "Compact" palloc: allocate without extra palloc overhead.
+ *
+ * Since we have no need to free the ispell data items individually, there's
+ * not much value in the per-chunk overhead normally consumed by palloc.
+ * Getting rid of it is helpful since ispell can allocate a lot of small nodes.
+ *
+ * We currently pre-zero all data allocated this way, even though some of it
+ * doesn't need that.  The cpalloc and cpalloc0 macros are just documentation
+ * to indicate which allocations actually require zeroing.
+ */
+#define COMPACT_ALLOC_CHUNK 8192       /* amount to get from palloc at once */
+#define COMPACT_MAX_REQ                1024    /* must be < COMPACT_ALLOC_CHUNK */
+
+static void *
+compact_palloc0(IspellDict *Conf, size_t size)
+{
+       void       *result;
+
+       /* Should only be called during init */
+       Assert(Conf->buildCxt != NULL);
+
+       /* No point in this for large chunks */
+       if (size > COMPACT_MAX_REQ)
+               return palloc0(size);
+
+       /* Keep everything maxaligned */
+       size = MAXALIGN(size);
+
+       /* Need more space? */
+       if (size > Conf->avail)
         {
-               tmpCtx = AllocSetContextCreate(CurrentMemoryContext,
-                                                                          "Ispell dictionary init context",
-                                                                          ALLOCSET_DEFAULT_MINSIZE,
-                                                                          ALLOCSET_DEFAULT_INITSIZE,
-                                                                          ALLOCSET_DEFAULT_MAXSIZE);
+               Conf->firstfree = palloc0(COMPACT_ALLOC_CHUNK);
+               Conf->avail = COMPACT_ALLOC_CHUNK;
         }
-       else
-               tmpCtx = CurrentMemoryContext->firstchild;
+
+       result = (void *) Conf->firstfree;
+       Conf->firstfree += size;
+       Conf->avail -= size;
+
+       return result;
  }
  
+#define cpalloc(size) compact_palloc0(Conf, size)
+#define cpalloc0(size) compact_palloc0(Conf, size)
+
  static char *
-lowerstr_ctx(char *src)
+cpstrdup(IspellDict *Conf, const char *str)
+{
+       char       *res = cpalloc(strlen(str) + 1);
+
+       strcpy(res, str);
+       return res;
+}
+
+
+/*
+ * Apply lowerstr(), producing a temporary result (in the buildCxt).
+ */
+static char *
+lowerstr_ctx(IspellDict *Conf, const char *src)
  {
         MemoryContext saveCtx;
         char       *dst;
  
-       saveCtx = MemoryContextSwitchTo(tmpCtx);
+       saveCtx = MemoryContextSwitchTo(Conf->buildCxt);
         dst = lowerstr(src);
         MemoryContextSwitchTo(saveCtx);
  
@@ -67,7 +140,7 @@ lowerstr_ctx(char *src)
  #define MAXNORMLEN 256
  
  #define STRNCMP(s,p)   strncmp( (s), (p), strlen(p) )
-#define GETWCHAR(W,L,N,T) ( ((uint8*)(W))[ ((T)==FF_PREFIX) ? (N) : ( (L) - 1 - (N) ) ] )
+#define GETWCHAR(W,L,N,T) ( ((const uint8*)(W))[ ((T)==FF_PREFIX) ? (N) : ( (L) - 1 - (N) ) ] )
  #define GETCHAR(A,N,T)   GETWCHAR( (A)->repl, (A)->replen, N, T )
  
  static char *VoidString = "";
@@ -75,12 +148,12 @@ static char *VoidString = "";
  static int
  cmpspell(const void *s1, const void *s2)
  {
-       return (strcmp((*(const SPELL **) s1)->word, (*(const SPELL **) s2)->word));
+       return (strcmp((*(SPELL *const *) s1)->word, (*(SPELL *const *) s2)->word));
  }
  static int
  cmpspellaffix(const void *s1, const void *s2)
  {
-       return (strncmp((*(const SPELL **) s1)->p.flag, (*(const SPELL **) s2)->p.flag, MAXFLAGLEN));
+       return (strncmp((*(SPELL *const *) s1)->p.flag, (*(SPELL *const *) s2)->p.flag, MAXFLAGLEN));
  }
  
  static char *
@@ -120,6 +193,7 @@ strbcmp(const unsigned char *s1, const unsigned char *s2)
  
         return 0;
  }
+
  static int
  strbncmp(const unsigned char *s1, const unsigned char *s2, size_t count)
  {
@@ -170,7 +244,7 @@ NIAddSpell(IspellDict *Conf, const char *word, const char *flag)
         {
                 if (Conf->mspell)
                 {
-                       Conf->mspell += 1024 * 20;
+                       Conf->mspell *= 2;
                         Conf->Spell = (SPELL **) repalloc(Conf->Spell, Conf->mspell * sizeof(SPELL *));
                 }
                 else
@@ -196,8 +270,6 @@ NIImportDictionary(IspellDict *Conf, const char *filename)
         tsearch_readline_state trst;
         char       *line;
  
-       checkTmpCtx();
-
         if (!tsearch_readline_begin(&trst, filename))
                 ereport(ERROR,
                                 (errcode(ERRCODE_CONFIG_FILE_ERROR),
@@ -242,7 +314,7 @@ NIImportDictionary(IspellDict *Conf, const char *filename)
                         }
                         s += pg_mblen(s);
                 }
-               pstr = lowerstr_ctx(line);
+               pstr = lowerstr_ctx(Conf, line);
  
                 NIAddSpell(Conf, pstr, flag);
                 pfree(pstr);
@@ -260,7 +332,7 @@ FindWord(IspellDict *Conf, const char *word, int affixflag, int flag)
         SPNodeData *StopLow,
                            *StopHigh,
                            *StopMiddle;
-       uint8      *ptr = (uint8 *) word;
+       const uint8 *ptr = (const uint8 *) word;
  
         flag &= FF_DICTFLAGMASK;
  
@@ -310,7 +382,7 @@ NIAddAffix(IspellDict *Conf, int flag, char flagflags, const char *mask, const c
         {
                 if (Conf->maffixes)
                 {
-                       Conf->maffixes += 16;
+                       Conf->maffixes *= 2;
                         Conf->Affix = (AFFIX *) repalloc((void *) Conf->Affix, Conf->maffixes * sizeof(AFFIX));
                 }
                 else
@@ -354,7 +426,9 @@ NIAddAffix(IspellDict *Conf, int flag, char flagflags, const char *mask, const c
                 wmask = (pg_wchar *) tmpalloc((masklen + 1) * sizeof(pg_wchar));
                 wmasklen = pg_mb2wchar_with_len(tmask, wmask, masklen);
  
-               err = pg_regcomp(&(Affix->reg.regex), wmask, wmasklen, REG_ADVANCED | REG_NOSUB);
+               err = pg_regcomp(&(Affix->reg.regex), wmask, wmasklen,
+                                                REG_ADVANCED | REG_NOSUB,
+                                                DEFAULT_COLLATION_OID);
                 if (err)
                 {
                         char            errstr[100];
@@ -375,9 +449,9 @@ NIAddAffix(IspellDict *Conf, int flag, char flagflags, const char *mask, const c
         Affix->flag = flag;
         Affix->type = type;
  
-       Affix->find = (find && *find) ? pstrdup(find) : VoidString;
+       Affix->find = (find && *find) ? cpstrdup(Conf, find) : VoidString;
         if ((Affix->replen = strlen(repl)) > 0)
-               Affix->repl = pstrdup(repl);
+               Affix->repl = cpstrdup(Conf, repl);
         else
                 Affix->repl = VoidString;
         Conf->naffixes++;
@@ -521,7 +595,7 @@ addFlagValue(IspellDict *Conf, char *s, uint32 val)
                                 (errcode(ERRCODE_CONFIG_FILE_ERROR),
                                  errmsg("multibyte flag character is not allowed")));
  
-       Conf->flagval[*(unsigned char*) s] = (unsigned char) val;
+       Conf->flagval[*(unsigned char *) s] = (unsigned char) val;
         Conf->usecompound = true;
  }
  
@@ -545,8 +619,6 @@ NIImportOOAffixes(IspellDict *Conf, const char *filename)
         char            scanbuf[BUFSIZ];
         char       *recoded;
  
-       checkTmpCtx();
-
         /* read file to find any flag */
         memset(Conf->flagval, 0, sizeof(Conf->flagval));
         Conf->usecompound = false;
@@ -624,7 +696,7 @@ NIImportOOAffixes(IspellDict *Conf, const char *filename)
  
                 if (ptype)
                         pfree(ptype);
-               ptype = lowerstr_ctx(type);
+               ptype = lowerstr_ctx(Conf, type);
                 if (scanread < 4 || (STRNCMP(ptype, "sfx") && STRNCMP(ptype, "pfx")))
                         goto nextline;
  
@@ -646,7 +718,7 @@ NIImportOOAffixes(IspellDict *Conf, const char *filename)
  
                         if (strlen(sflag) != 1 || flag != *sflag || flag == 0)
                                 goto nextline;
-                       prepl = lowerstr_ctx(repl);
+                       prepl = lowerstr_ctx(Conf, repl);
                         /* affix flag */
                         if ((ptr = strchr(prepl, '/')) != NULL)
                         {
@@ -654,12 +726,12 @@ NIImportOOAffixes(IspellDict *Conf, const char *filename)
                                 ptr = repl + (ptr - prepl) + 1;
                                 while (*ptr)
                                 {
-                                       aflg |= Conf->flagval[*(unsigned char*) ptr];
+                                       aflg |= Conf->flagval[*(unsigned char *) ptr];
                                         ptr++;
                                 }
                         }
-                       pfind = lowerstr_ctx(find);
-                       pmask = lowerstr_ctx(mask);
+                       pfind = lowerstr_ctx(Conf, find);
+                       pmask = lowerstr_ctx(Conf, mask);
                         if (t_iseq(find, '0'))
                                 *pfind = '\0';
                         if (t_iseq(repl, '0'))
@@ -702,8 +774,6 @@ NIImportAffixes(IspellDict *Conf, const char *filename)
         bool            oldformat = false;
         char       *recoded = NULL;
  
-       checkTmpCtx();
-
         if (!tsearch_readline_begin(&trst, filename))
                 ereport(ERROR,
                                 (errcode(ERRCODE_CONFIG_FILE_ERROR),
@@ -735,7 +805,7 @@ NIImportAffixes(IspellDict *Conf, const char *filename)
  
                                 if (*s && pg_mblen(s) == 1)
                                 {
-                                       Conf->flagval[*(unsigned char*) s] = FF_COMPOUNDFLAG;
+                                       Conf->flagval[*(unsigned char *) s] = FF_COMPOUNDFLAG;
                                         Conf->usecompound = true;
                                 }
                                 oldformat = true;
@@ -791,7 +861,7 @@ NIImportAffixes(IspellDict *Conf, const char *filename)
                                                 (errcode(ERRCODE_CONFIG_FILE_ERROR),
                                                  errmsg("multibyte flag character is not allowed")));
  
-                       flag = *(unsigned char*) s;
+                       flag = *(unsigned char *) s;
                         goto nextline;
                 }
                 if (STRNCMP(recoded, "COMPOUNDFLAG") == 0 || STRNCMP(recoded, "COMPOUNDMIN") == 0 ||
@@ -833,8 +903,9 @@ MergeAffix(IspellDict *Conf, int a1, int a2)
         }
  
         ptr = Conf->AffixData + Conf->nAffixData;
-       *ptr = palloc(strlen(Conf->AffixData[a1]) + strlen(Conf->AffixData[a2]) +
-                                 1 /* space */ + 1 /* \0 */ );
+       *ptr = cpalloc(strlen(Conf->AffixData[a1]) +
+                                  strlen(Conf->AffixData[a2]) +
+                                  1 /* space */ + 1 /* \0 */ );
         sprintf(*ptr, "%s %s", Conf->AffixData[a1], Conf->AffixData[a2]);
         ptr++;
         *ptr = NULL;
@@ -851,7 +922,7 @@ makeCompoundFlags(IspellDict *Conf, int affix)
  
         while (str && *str)
         {
-               flag |= Conf->flagval[*(unsigned char*) str];
+               flag |= Conf->flagval[*(unsigned char *) str];
                 str++;
         }
  
@@ -878,7 +949,7 @@ mkSPNode(IspellDict *Conf, int low, int high, int level)
         if (!nchar)
                 return NULL;
  
-       rs = (SPNode *) palloc0(SPNHDRSZ + nchar * sizeof(SPNodeData));
+       rs = (SPNode *) cpalloc0(SPNHDRSZ + nchar * sizeof(SPNodeData));
         rs->length = nchar;
         data = rs->data;
  
@@ -945,8 +1016,6 @@ NISortDictionary(IspellDict *Conf)
         int                     naffix = 0;
         int                     curaffix;
  
-       checkTmpCtx();
-
         /* compress affixes */
  
         /* Count the number of different flags used in the dictionary */
@@ -974,7 +1043,7 @@ NISortDictionary(IspellDict *Conf)
                 {
                         curaffix++;
                         Assert(curaffix < naffix);
-                       Conf->AffixData[curaffix] = pstrdup(Conf->Spell[i]->p.flag);
+                       Conf->AffixData[curaffix] = cpstrdup(Conf, Conf->Spell[i]->p.flag);
                 }
  
                 Conf->Spell[i]->p.d.affix = curaffix;
@@ -985,8 +1054,6 @@ NISortDictionary(IspellDict *Conf)
  
         qsort((void *) Conf->Spell, Conf->nspell, sizeof(SPELL *), cmpspell);
         Conf->Dictionary = mkSPNode(Conf, 0, Conf->nspell, 0);
-
-       Conf->Spell = NULL;
  }
  
  static AffixNode *
@@ -1014,7 +1081,7 @@ mkANode(IspellDict *Conf, int low, int high, int level, int type)
         aff = (AFFIX **) tmpalloc(sizeof(AFFIX *) * (high - low + 1));
         naff = 0;
  
-       rs = (AffixNode *) palloc0(ANHRDSZ + nchar * sizeof(AffixNodeData));
+       rs = (AffixNode *) cpalloc0(ANHRDSZ + nchar * sizeof(AffixNodeData));
         rs->length = nchar;
         data = rs->data;
  
@@ -1030,7 +1097,7 @@ mkANode(IspellDict *Conf, int low, int high, int level, int type)
                                         if (naff)
                                         {
                                                 data->naff = naff;
-                                               data->aff = (AFFIX **) palloc(sizeof(AFFIX *) * naff);
+                                               data->aff = (AFFIX **) cpalloc(sizeof(AFFIX *) * naff);
                                                 memcpy(data->aff, aff, sizeof(AFFIX *) * naff);
                                                 naff = 0;
                                         }
@@ -1050,7 +1117,7 @@ mkANode(IspellDict *Conf, int low, int high, int level, int type)
         if (naff)
         {
                 data->naff = naff;
-               data->aff = (AFFIX **) palloc(sizeof(AFFIX *) * naff);
+               data->aff = (AFFIX **) cpalloc(sizeof(AFFIX *) * naff);
                 memcpy(data->aff, aff, sizeof(AFFIX *) * naff);
                 naff = 0;
         }
@@ -1091,7 +1158,7 @@ mkVoidAffix(IspellDict *Conf, bool issuffix, int startsuffix)
         if (cnt == 0)
                 return;
  
-       Affix->data->aff = (AFFIX **) palloc(sizeof(AFFIX *) * cnt);
+       Affix->data->aff = (AFFIX **) cpalloc(sizeof(AFFIX *) * cnt);
         Affix->data->naff = (uint32) cnt;
  
         cnt = 0;
@@ -1123,8 +1190,6 @@ NISortAffixes(IspellDict *Conf)
         CMPDAffix  *ptr;
         int                     firstsuffix = Conf->naffixes;
  
-       checkTmpCtx();
-
         if (Conf->naffixes == 0)
                 return;
  
@@ -1306,7 +1371,7 @@ addToResult(char **forms, char **cur, char *word)
         if (forms == cur || strcmp(word, *(cur - 1)) != 0)
         {
                 *cur = pstrdup(word);
-               *(cur+1) = NULL;
+               *(cur + 1) = NULL;
                 return 1;
         }
  
@@ -1497,7 +1562,7 @@ CopyVar(SplitVar *s, int makedup)
  static void
  AddStem(SplitVar *v, char *word)
  {
-       if ( v->nstem >= v->lenstem )
+       if (v->nstem >= v->lenstem)
         {
                 v->lenstem *= 2;
                 v->stem = (char **) repalloc(v->stem, sizeof(char *) * v->lenstem);
@@ -1546,8 +1611,8 @@ SplitToVariants(IspellDict *Conf, SPNode *snode, SplitVar *orig, char *word, int
                         if (level + lenaff - 1 <= minpos)
                                 continue;
  
-                       if ( lenaff >= MAXNORMLEN )
-                               continue; /* skip too big value */
+                       if (lenaff >= MAXNORMLEN)
+                               continue;               /* skip too big value */
                         if (lenaff > 0)
                                 memcpy(buf, word + startpos, lenaff);
                         buf[lenaff] = '\0';
@@ -1570,7 +1635,7 @@ SplitToVariants(IspellDict *Conf, SPNode *snode, SplitVar *orig, char *word, int
  
                                 while (*sptr)
                                 {
-                                       AddStem( new, *sptr ); 
+                                       AddStem(new, *sptr);
                                         sptr++;
                                 }
                                 pfree(subres);
@@ -1621,7 +1686,7 @@ SplitToVariants(IspellDict *Conf, SPNode *snode, SplitVar *orig, char *word, int
                                         if (wordlen == level + 1)
                                         {
                                                 /* well, it was last word */
-                                               AddStem( var, pnstrdup(word + startpos, wordlen - startpos) );
+                                               AddStem(var, pnstrdup(word + startpos, wordlen - startpos));
                                                 pfree(notprobed);
                                                 return var;
                                         }
@@ -1635,7 +1700,7 @@ SplitToVariants(IspellDict *Conf, SPNode *snode, SplitVar *orig, char *word, int
                                                 ptr->next = SplitToVariants(Conf, node, var, word, wordlen, startpos, level);
                                                 /* we can find next word */
                                                 level++;
-                                               AddStem( var, pnstrdup(word + startpos, level - startpos) );
+                                               AddStem(var, pnstrdup(word + startpos, level - startpos));
                                                 node = Conf->Dictionary;
                                                 startpos = level;
                                                 continue;
@@ -1649,18 +1714,19 @@ SplitToVariants(IspellDict *Conf, SPNode *snode, SplitVar *orig, char *word, int
                 level++;
         }
  
-       AddStem( var, pnstrdup(word + startpos, wordlen - startpos) );
+       AddStem(var, pnstrdup(word + startpos, wordlen - startpos));
         pfree(notprobed);
         return var;
  }
  
  static void
-addNorm( TSLexeme **lres, TSLexeme **lcur, char *word, int flags, uint16 NVariant)
+addNorm(TSLexeme **lres, TSLexeme **lcur, char *word, int flags, uint16 NVariant)
  {
-       if ( *lres == NULL ) 
+       if (*lres == NULL)
                 *lcur = *lres = (TSLexeme *) palloc(MAX_NORM * sizeof(TSLexeme));
  
-       if ( *lcur - *lres < MAX_NORM-1 ) { 
+       if (*lcur - *lres < MAX_NORM - 1)
+       {
                 (*lcur)->lexeme = word;
                 (*lcur)->flags = flags;
                 (*lcur)->nvariant = NVariant;
@@ -1683,9 +1749,9 @@ NINormalizeWord(IspellDict *Conf, char *word)
         {
                 char      **ptr = res;
  
-               while (*ptr && (lcur-lres) < MAX_NORM)
+               while (*ptr && (lcur - lres) < MAX_NORM)
                 {
-                       addNorm( &lres, &lcur, *ptr, 0, NVariant++);
+                       addNorm(&lres, &lcur, *ptr, 0, NVariant++);
                         ptr++;
                 }
                 pfree(res);
@@ -1712,10 +1778,10 @@ NINormalizeWord(IspellDict *Conf, char *word)
                                         {
                                                 for (i = 0; i < var->nstem - 1; i++)
                                                 {
-                                                       addNorm( &lres, &lcur, (subptr == subres) ? var->stem[i] : pstrdup(var->stem[i]), 0, NVariant); 
+                                                       addNorm(&lres, &lcur, (subptr == subres) ? var->stem[i] : pstrdup(var->stem[i]), 0, NVariant);
                                                 }
  
-                                               addNorm( &lres, &lcur, *subptr, 0, NVariant); 
+                                               addNorm(&lres, &lcur, *subptr, 0, NVariant);
                                                 subptr++;
                                                 NVariant++;
                                         }