]> granicus.if.org Git - postgresql/blobdiff - src/backend/tsearch/spell.c
Update copyright for 2014
[postgresql] / src / backend / tsearch / spell.c
index 71a77a1dcc2ec267f4cc0d95b04c4b8c9e3b16c3..1ca64423297d2a88a2dabbf89758a223ac41a6e5 100644 (file)
@@ -3,17 +3,18 @@
  * spell.c
  *             Normalizing word with ISpell
  *
- * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1996-2014, PostgreSQL Global Development Group
  *
  *
  * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/tsearch/spell.c,v 1.15 2009/01/29 16:22:10 teodor Exp $
+ *       src/backend/tsearch/spell.c
  *
  *-------------------------------------------------------------------------
  */
 
 #include "postgres.h"
 
+#include "catalog/pg_collation.h"
 #include "tsearch/dicts/spell.h"
 #include "tsearch/ts_locale.h"
 #include "utils/memutils.h"
 
 /*
  * Initialization requires a lot of memory that's not needed
- * after the initialization is done.  In init function,
- * CurrentMemoryContext is a long lived memory context associated
- * with the dictionary cache entry, so we use a temporary context
- * for the short-lived stuff.
+ * after the initialization is done.  During initialization,
+ * CurrentMemoryContext is the long-lived memory context associated
+ * with the dictionary cache entry.  We keep the short-lived stuff
+ * in the Conf->buildCxt context.
  */
-static MemoryContext tmpCtx = NULL;
+#define tmpalloc(sz)  MemoryContextAlloc(Conf->buildCxt, (sz))
+#define tmpalloc0(sz)  MemoryContextAllocZero(Conf->buildCxt, (sz))
 
-#define tmpalloc(sz)  MemoryContextAlloc(tmpCtx, (sz))
-#define tmpalloc0(sz)  MemoryContextAllocZero(tmpCtx, (sz))
-
-static void
-checkTmpCtx(void)
+/*
+ * Prepare for constructing an ISpell dictionary.
+ *
+ * The IspellDict struct is assumed to be zeroed when allocated.
+ */
+void
+NIStartBuild(IspellDict *Conf)
 {
        /*
-        * XXX: This assumes that CurrentMemoryContext doesn't have any children
-        * other than the one we create here.
+        * The temp context is a child of CurTransactionContext, so that it will
+        * go away automatically on error.
         */
-       if (CurrentMemoryContext->firstchild == NULL)
+       Conf->buildCxt = AllocSetContextCreate(CurTransactionContext,
+                                                                                  "Ispell dictionary init context",
+                                                                                  ALLOCSET_DEFAULT_MINSIZE,
+                                                                                  ALLOCSET_DEFAULT_INITSIZE,
+                                                                                  ALLOCSET_DEFAULT_MAXSIZE);
+}
+
+/*
+ * Clean up when dictionary construction is complete.
+ */
+void
+NIFinishBuild(IspellDict *Conf)
+{
+       /* Release no-longer-needed temp memory */
+       MemoryContextDelete(Conf->buildCxt);
+       /* Just for cleanliness, zero the now-dangling pointers */
+       Conf->buildCxt = NULL;
+       Conf->Spell = NULL;
+       Conf->firstfree = NULL;
+}
+
+
+/*
+ * "Compact" palloc: allocate without extra palloc overhead.
+ *
+ * Since we have no need to free the ispell data items individually, there's
+ * not much value in the per-chunk overhead normally consumed by palloc.
+ * Getting rid of it is helpful since ispell can allocate a lot of small nodes.
+ *
+ * We currently pre-zero all data allocated this way, even though some of it
+ * doesn't need that.  The cpalloc and cpalloc0 macros are just documentation
+ * to indicate which allocations actually require zeroing.
+ */
+#define COMPACT_ALLOC_CHUNK 8192       /* amount to get from palloc at once */
+#define COMPACT_MAX_REQ                1024    /* must be < COMPACT_ALLOC_CHUNK */
+
+static void *
+compact_palloc0(IspellDict *Conf, size_t size)
+{
+       void       *result;
+
+       /* Should only be called during init */
+       Assert(Conf->buildCxt != NULL);
+
+       /* No point in this for large chunks */
+       if (size > COMPACT_MAX_REQ)
+               return palloc0(size);
+
+       /* Keep everything maxaligned */
+       size = MAXALIGN(size);
+
+       /* Need more space? */
+       if (size > Conf->avail)
        {
-               tmpCtx = AllocSetContextCreate(CurrentMemoryContext,
-                                                                          "Ispell dictionary init context",
-                                                                          ALLOCSET_DEFAULT_MINSIZE,
-                                                                          ALLOCSET_DEFAULT_INITSIZE,
-                                                                          ALLOCSET_DEFAULT_MAXSIZE);
+               Conf->firstfree = palloc0(COMPACT_ALLOC_CHUNK);
+               Conf->avail = COMPACT_ALLOC_CHUNK;
        }
-       else
-               tmpCtx = CurrentMemoryContext->firstchild;
+
+       result = (void *) Conf->firstfree;
+       Conf->firstfree += size;
+       Conf->avail -= size;
+
+       return result;
 }
 
+#define cpalloc(size) compact_palloc0(Conf, size)
+#define cpalloc0(size) compact_palloc0(Conf, size)
+
 static char *
-lowerstr_ctx(char *src)
+cpstrdup(IspellDict *Conf, const char *str)
+{
+       char       *res = cpalloc(strlen(str) + 1);
+
+       strcpy(res, str);
+       return res;
+}
+
+
+/*
+ * Apply lowerstr(), producing a temporary result (in the buildCxt).
+ */
+static char *
+lowerstr_ctx(IspellDict *Conf, const char *src)
 {
        MemoryContext saveCtx;
        char       *dst;
 
-       saveCtx = MemoryContextSwitchTo(tmpCtx);
+       saveCtx = MemoryContextSwitchTo(Conf->buildCxt);
        dst = lowerstr(src);
        MemoryContextSwitchTo(saveCtx);
 
@@ -67,7 +140,7 @@ lowerstr_ctx(char *src)
 #define MAXNORMLEN 256
 
 #define STRNCMP(s,p)   strncmp( (s), (p), strlen(p) )
-#define GETWCHAR(W,L,N,T) ( ((uint8*)(W))[ ((T)==FF_PREFIX) ? (N) : ( (L) - 1 - (N) ) ] )
+#define GETWCHAR(W,L,N,T) ( ((const uint8*)(W))[ ((T)==FF_PREFIX) ? (N) : ( (L) - 1 - (N) ) ] )
 #define GETCHAR(A,N,T)   GETWCHAR( (A)->repl, (A)->replen, N, T )
 
 static char *VoidString = "";
@@ -75,12 +148,12 @@ static char *VoidString = "";
 static int
 cmpspell(const void *s1, const void *s2)
 {
-       return (strcmp((*(const SPELL **) s1)->word, (*(const SPELL **) s2)->word));
+       return (strcmp((*(SPELL *const *) s1)->word, (*(SPELL *const *) s2)->word));
 }
 static int
 cmpspellaffix(const void *s1, const void *s2)
 {
-       return (strncmp((*(const SPELL **) s1)->p.flag, (*(const SPELL **) s2)->p.flag, MAXFLAGLEN));
+       return (strncmp((*(SPELL *const *) s1)->p.flag, (*(SPELL *const *) s2)->p.flag, MAXFLAGLEN));
 }
 
 static char *
@@ -120,6 +193,7 @@ strbcmp(const unsigned char *s1, const unsigned char *s2)
 
        return 0;
 }
+
 static int
 strbncmp(const unsigned char *s1, const unsigned char *s2, size_t count)
 {
@@ -170,7 +244,7 @@ NIAddSpell(IspellDict *Conf, const char *word, const char *flag)
        {
                if (Conf->mspell)
                {
-                       Conf->mspell += 1024 * 20;
+                       Conf->mspell *= 2;
                        Conf->Spell = (SPELL **) repalloc(Conf->Spell, Conf->mspell * sizeof(SPELL *));
                }
                else
@@ -196,8 +270,6 @@ NIImportDictionary(IspellDict *Conf, const char *filename)
        tsearch_readline_state trst;
        char       *line;
 
-       checkTmpCtx();
-
        if (!tsearch_readline_begin(&trst, filename))
                ereport(ERROR,
                                (errcode(ERRCODE_CONFIG_FILE_ERROR),
@@ -242,7 +314,7 @@ NIImportDictionary(IspellDict *Conf, const char *filename)
                        }
                        s += pg_mblen(s);
                }
-               pstr = lowerstr_ctx(line);
+               pstr = lowerstr_ctx(Conf, line);
 
                NIAddSpell(Conf, pstr, flag);
                pfree(pstr);
@@ -260,7 +332,7 @@ FindWord(IspellDict *Conf, const char *word, int affixflag, int flag)
        SPNodeData *StopLow,
                           *StopHigh,
                           *StopMiddle;
-       uint8      *ptr = (uint8 *) word;
+       const uint8 *ptr = (const uint8 *) word;
 
        flag &= FF_DICTFLAGMASK;
 
@@ -310,7 +382,7 @@ NIAddAffix(IspellDict *Conf, int flag, char flagflags, const char *mask, const c
        {
                if (Conf->maffixes)
                {
-                       Conf->maffixes += 16;
+                       Conf->maffixes *= 2;
                        Conf->Affix = (AFFIX *) repalloc((void *) Conf->Affix, Conf->maffixes * sizeof(AFFIX));
                }
                else
@@ -354,7 +426,9 @@ NIAddAffix(IspellDict *Conf, int flag, char flagflags, const char *mask, const c
                wmask = (pg_wchar *) tmpalloc((masklen + 1) * sizeof(pg_wchar));
                wmasklen = pg_mb2wchar_with_len(tmask, wmask, masklen);
 
-               err = pg_regcomp(&(Affix->reg.regex), wmask, wmasklen, REG_ADVANCED | REG_NOSUB);
+               err = pg_regcomp(&(Affix->reg.regex), wmask, wmasklen,
+                                                REG_ADVANCED | REG_NOSUB,
+                                                DEFAULT_COLLATION_OID);
                if (err)
                {
                        char            errstr[100];
@@ -375,9 +449,9 @@ NIAddAffix(IspellDict *Conf, int flag, char flagflags, const char *mask, const c
        Affix->flag = flag;
        Affix->type = type;
 
-       Affix->find = (find && *find) ? pstrdup(find) : VoidString;
+       Affix->find = (find && *find) ? cpstrdup(Conf, find) : VoidString;
        if ((Affix->replen = strlen(repl)) > 0)
-               Affix->repl = pstrdup(repl);
+               Affix->repl = cpstrdup(Conf, repl);
        else
                Affix->repl = VoidString;
        Conf->naffixes++;
@@ -521,7 +595,7 @@ addFlagValue(IspellDict *Conf, char *s, uint32 val)
                                (errcode(ERRCODE_CONFIG_FILE_ERROR),
                                 errmsg("multibyte flag character is not allowed")));
 
-       Conf->flagval[*(unsigned char*) s] = (unsigned char) val;
+       Conf->flagval[*(unsigned char *) s] = (unsigned char) val;
        Conf->usecompound = true;
 }
 
@@ -545,8 +619,6 @@ NIImportOOAffixes(IspellDict *Conf, const char *filename)
        char            scanbuf[BUFSIZ];
        char       *recoded;
 
-       checkTmpCtx();
-
        /* read file to find any flag */
        memset(Conf->flagval, 0, sizeof(Conf->flagval));
        Conf->usecompound = false;
@@ -624,7 +696,7 @@ NIImportOOAffixes(IspellDict *Conf, const char *filename)
 
                if (ptype)
                        pfree(ptype);
-               ptype = lowerstr_ctx(type);
+               ptype = lowerstr_ctx(Conf, type);
                if (scanread < 4 || (STRNCMP(ptype, "sfx") && STRNCMP(ptype, "pfx")))
                        goto nextline;
 
@@ -646,7 +718,7 @@ NIImportOOAffixes(IspellDict *Conf, const char *filename)
 
                        if (strlen(sflag) != 1 || flag != *sflag || flag == 0)
                                goto nextline;
-                       prepl = lowerstr_ctx(repl);
+                       prepl = lowerstr_ctx(Conf, repl);
                        /* affix flag */
                        if ((ptr = strchr(prepl, '/')) != NULL)
                        {
@@ -654,12 +726,12 @@ NIImportOOAffixes(IspellDict *Conf, const char *filename)
                                ptr = repl + (ptr - prepl) + 1;
                                while (*ptr)
                                {
-                                       aflg |= Conf->flagval[*(unsigned char*) ptr];
+                                       aflg |= Conf->flagval[*(unsigned char *) ptr];
                                        ptr++;
                                }
                        }
-                       pfind = lowerstr_ctx(find);
-                       pmask = lowerstr_ctx(mask);
+                       pfind = lowerstr_ctx(Conf, find);
+                       pmask = lowerstr_ctx(Conf, mask);
                        if (t_iseq(find, '0'))
                                *pfind = '\0';
                        if (t_iseq(repl, '0'))
@@ -702,8 +774,6 @@ NIImportAffixes(IspellDict *Conf, const char *filename)
        bool            oldformat = false;
        char       *recoded = NULL;
 
-       checkTmpCtx();
-
        if (!tsearch_readline_begin(&trst, filename))
                ereport(ERROR,
                                (errcode(ERRCODE_CONFIG_FILE_ERROR),
@@ -735,7 +805,7 @@ NIImportAffixes(IspellDict *Conf, const char *filename)
 
                                if (*s && pg_mblen(s) == 1)
                                {
-                                       Conf->flagval[*(unsigned char*) s] = FF_COMPOUNDFLAG;
+                                       Conf->flagval[*(unsigned char *) s] = FF_COMPOUNDFLAG;
                                        Conf->usecompound = true;
                                }
                                oldformat = true;
@@ -791,7 +861,7 @@ NIImportAffixes(IspellDict *Conf, const char *filename)
                                                (errcode(ERRCODE_CONFIG_FILE_ERROR),
                                                 errmsg("multibyte flag character is not allowed")));
 
-                       flag = *(unsigned char*) s;
+                       flag = *(unsigned char *) s;
                        goto nextline;
                }
                if (STRNCMP(recoded, "COMPOUNDFLAG") == 0 || STRNCMP(recoded, "COMPOUNDMIN") == 0 ||
@@ -833,8 +903,9 @@ MergeAffix(IspellDict *Conf, int a1, int a2)
        }
 
        ptr = Conf->AffixData + Conf->nAffixData;
-       *ptr = palloc(strlen(Conf->AffixData[a1]) + strlen(Conf->AffixData[a2]) +
-                                 1 /* space */ + 1 /* \0 */ );
+       *ptr = cpalloc(strlen(Conf->AffixData[a1]) +
+                                  strlen(Conf->AffixData[a2]) +
+                                  1 /* space */ + 1 /* \0 */ );
        sprintf(*ptr, "%s %s", Conf->AffixData[a1], Conf->AffixData[a2]);
        ptr++;
        *ptr = NULL;
@@ -851,7 +922,7 @@ makeCompoundFlags(IspellDict *Conf, int affix)
 
        while (str && *str)
        {
-               flag |= Conf->flagval[*(unsigned char*) str];
+               flag |= Conf->flagval[*(unsigned char *) str];
                str++;
        }
 
@@ -878,7 +949,7 @@ mkSPNode(IspellDict *Conf, int low, int high, int level)
        if (!nchar)
                return NULL;
 
-       rs = (SPNode *) palloc0(SPNHDRSZ + nchar * sizeof(SPNodeData));
+       rs = (SPNode *) cpalloc0(SPNHDRSZ + nchar * sizeof(SPNodeData));
        rs->length = nchar;
        data = rs->data;
 
@@ -945,8 +1016,6 @@ NISortDictionary(IspellDict *Conf)
        int                     naffix = 0;
        int                     curaffix;
 
-       checkTmpCtx();
-
        /* compress affixes */
 
        /* Count the number of different flags used in the dictionary */
@@ -974,7 +1043,7 @@ NISortDictionary(IspellDict *Conf)
                {
                        curaffix++;
                        Assert(curaffix < naffix);
-                       Conf->AffixData[curaffix] = pstrdup(Conf->Spell[i]->p.flag);
+                       Conf->AffixData[curaffix] = cpstrdup(Conf, Conf->Spell[i]->p.flag);
                }
 
                Conf->Spell[i]->p.d.affix = curaffix;
@@ -985,8 +1054,6 @@ NISortDictionary(IspellDict *Conf)
 
        qsort((void *) Conf->Spell, Conf->nspell, sizeof(SPELL *), cmpspell);
        Conf->Dictionary = mkSPNode(Conf, 0, Conf->nspell, 0);
-
-       Conf->Spell = NULL;
 }
 
 static AffixNode *
@@ -1014,7 +1081,7 @@ mkANode(IspellDict *Conf, int low, int high, int level, int type)
        aff = (AFFIX **) tmpalloc(sizeof(AFFIX *) * (high - low + 1));
        naff = 0;
 
-       rs = (AffixNode *) palloc0(ANHRDSZ + nchar * sizeof(AffixNodeData));
+       rs = (AffixNode *) cpalloc0(ANHRDSZ + nchar * sizeof(AffixNodeData));
        rs->length = nchar;
        data = rs->data;
 
@@ -1030,7 +1097,7 @@ mkANode(IspellDict *Conf, int low, int high, int level, int type)
                                        if (naff)
                                        {
                                                data->naff = naff;
-                                               data->aff = (AFFIX **) palloc(sizeof(AFFIX *) * naff);
+                                               data->aff = (AFFIX **) cpalloc(sizeof(AFFIX *) * naff);
                                                memcpy(data->aff, aff, sizeof(AFFIX *) * naff);
                                                naff = 0;
                                        }
@@ -1050,7 +1117,7 @@ mkANode(IspellDict *Conf, int low, int high, int level, int type)
        if (naff)
        {
                data->naff = naff;
-               data->aff = (AFFIX **) palloc(sizeof(AFFIX *) * naff);
+               data->aff = (AFFIX **) cpalloc(sizeof(AFFIX *) * naff);
                memcpy(data->aff, aff, sizeof(AFFIX *) * naff);
                naff = 0;
        }
@@ -1091,7 +1158,7 @@ mkVoidAffix(IspellDict *Conf, bool issuffix, int startsuffix)
        if (cnt == 0)
                return;
 
-       Affix->data->aff = (AFFIX **) palloc(sizeof(AFFIX *) * cnt);
+       Affix->data->aff = (AFFIX **) cpalloc(sizeof(AFFIX *) * cnt);
        Affix->data->naff = (uint32) cnt;
 
        cnt = 0;
@@ -1123,8 +1190,6 @@ NISortAffixes(IspellDict *Conf)
        CMPDAffix  *ptr;
        int                     firstsuffix = Conf->naffixes;
 
-       checkTmpCtx();
-
        if (Conf->naffixes == 0)
                return;
 
@@ -1306,7 +1371,7 @@ addToResult(char **forms, char **cur, char *word)
        if (forms == cur || strcmp(word, *(cur - 1)) != 0)
        {
                *cur = pstrdup(word);
-               *(cur+1) = NULL;
+               *(cur + 1) = NULL;
                return 1;
        }
 
@@ -1497,7 +1562,7 @@ CopyVar(SplitVar *s, int makedup)
 static void
 AddStem(SplitVar *v, char *word)
 {
-       if ( v->nstem >= v->lenstem )
+       if (v->nstem >= v->lenstem)
        {
                v->lenstem *= 2;
                v->stem = (char **) repalloc(v->stem, sizeof(char *) * v->lenstem);
@@ -1546,8 +1611,8 @@ SplitToVariants(IspellDict *Conf, SPNode *snode, SplitVar *orig, char *word, int
                        if (level + lenaff - 1 <= minpos)
                                continue;
 
-                       if ( lenaff >= MAXNORMLEN )
-                               continue; /* skip too big value */
+                       if (lenaff >= MAXNORMLEN)
+                               continue;               /* skip too big value */
                        if (lenaff > 0)
                                memcpy(buf, word + startpos, lenaff);
                        buf[lenaff] = '\0';
@@ -1570,7 +1635,7 @@ SplitToVariants(IspellDict *Conf, SPNode *snode, SplitVar *orig, char *word, int
 
                                while (*sptr)
                                {
-                                       AddStem( new, *sptr ); 
+                                       AddStem(new, *sptr);
                                        sptr++;
                                }
                                pfree(subres);
@@ -1621,7 +1686,7 @@ SplitToVariants(IspellDict *Conf, SPNode *snode, SplitVar *orig, char *word, int
                                        if (wordlen == level + 1)
                                        {
                                                /* well, it was last word */
-                                               AddStem( var, pnstrdup(word + startpos, wordlen - startpos) );
+                                               AddStem(var, pnstrdup(word + startpos, wordlen - startpos));
                                                pfree(notprobed);
                                                return var;
                                        }
@@ -1635,7 +1700,7 @@ SplitToVariants(IspellDict *Conf, SPNode *snode, SplitVar *orig, char *word, int
                                                ptr->next = SplitToVariants(Conf, node, var, word, wordlen, startpos, level);
                                                /* we can find next word */
                                                level++;
-                                               AddStem( var, pnstrdup(word + startpos, level - startpos) );
+                                               AddStem(var, pnstrdup(word + startpos, level - startpos));
                                                node = Conf->Dictionary;
                                                startpos = level;
                                                continue;
@@ -1649,18 +1714,19 @@ SplitToVariants(IspellDict *Conf, SPNode *snode, SplitVar *orig, char *word, int
                level++;
        }
 
-       AddStem( var, pnstrdup(word + startpos, wordlen - startpos) );
+       AddStem(var, pnstrdup(word + startpos, wordlen - startpos));
        pfree(notprobed);
        return var;
 }
 
 static void
-addNorm( TSLexeme **lres, TSLexeme **lcur, char *word, int flags, uint16 NVariant)
+addNorm(TSLexeme **lres, TSLexeme **lcur, char *word, int flags, uint16 NVariant)
 {
-       if ( *lres == NULL ) 
+       if (*lres == NULL)
                *lcur = *lres = (TSLexeme *) palloc(MAX_NORM * sizeof(TSLexeme));
 
-       if ( *lcur - *lres < MAX_NORM-1 ) { 
+       if (*lcur - *lres < MAX_NORM - 1)
+       {
                (*lcur)->lexeme = word;
                (*lcur)->flags = flags;
                (*lcur)->nvariant = NVariant;
@@ -1683,9 +1749,9 @@ NINormalizeWord(IspellDict *Conf, char *word)
        {
                char      **ptr = res;
 
-               while (*ptr && (lcur-lres) < MAX_NORM)
+               while (*ptr && (lcur - lres) < MAX_NORM)
                {
-                       addNorm( &lres, &lcur, *ptr, 0, NVariant++);
+                       addNorm(&lres, &lcur, *ptr, 0, NVariant++);
                        ptr++;
                }
                pfree(res);
@@ -1712,10 +1778,10 @@ NINormalizeWord(IspellDict *Conf, char *word)
                                        {
                                                for (i = 0; i < var->nstem - 1; i++)
                                                {
-                                                       addNorm( &lres, &lcur, (subptr == subres) ? var->stem[i] : pstrdup(var->stem[i]), 0, NVariant); 
+                                                       addNorm(&lres, &lcur, (subptr == subres) ? var->stem[i] : pstrdup(var->stem[i]), 0, NVariant);
                                                }
 
-                                               addNorm( &lres, &lcur, *subptr, 0, NVariant); 
+                                               addNorm(&lres, &lcur, *subptr, 0, NVariant);
                                                subptr++;
                                                NVariant++;
                                        }