From 3e5f9412d0a818be77c974e5af710928097b91f3 Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Wed, 6 Oct 2010 19:31:05 -0400 Subject: [PATCH] Reduce the memory requirement for large ispell dictionaries. This patch eliminates per-chunk palloc overhead for most small allocations needed in the representation of an ispell dictionary. This saves close to a factor of 2 on the current Czech ispell data. While it doesn't cover every last small allocation in the ispell code, we are at the point of diminishing returns, because about 95% of the allocations are covered already. Pavel Stehule, rather heavily revised by Tom --- src/backend/tsearch/spell.c | 82 ++++++++++++++++++++++++++----- src/include/tsearch/dicts/spell.h | 4 ++ 2 files changed, 74 insertions(+), 12 deletions(-) diff --git a/src/backend/tsearch/spell.c b/src/backend/tsearch/spell.c index cd7ada6613..713833986a 100644 --- a/src/backend/tsearch/spell.c +++ b/src/backend/tsearch/spell.c @@ -59,6 +59,63 @@ NIFinishBuild(IspellDict *Conf) /* Just for cleanliness, zero the now-dangling pointers */ Conf->buildCxt = NULL; Conf->Spell = NULL; + Conf->firstfree = NULL; +} + + +/* + * "Compact" palloc: allocate without extra palloc overhead. + * + * Since we have no need to free the ispell data items individually, there's + * not much value in the per-chunk overhead normally consumed by palloc. + * Getting rid of it is helpful since ispell can allocate a lot of small nodes. + * + * We currently pre-zero all data allocated this way, even though some of it + * doesn't need that. The cpalloc and cpalloc0 macros are just documentation + * to indicate which allocations actually require zeroing. + */ +#define COMPACT_ALLOC_CHUNK 8192 /* must be > aset.c's allocChunkLimit */ +#define COMPACT_MAX_REQ 1024 /* must be < COMPACT_ALLOC_CHUNK */ + +static void * +compact_palloc0(IspellDict *Conf, size_t size) +{ + void *result; + + /* Should only be called during init */ + Assert(Conf->buildCxt != NULL); + + /* No point in this for large chunks */ + if (size > COMPACT_MAX_REQ) + return palloc0(size); + + /* Keep everything maxaligned */ + size = MAXALIGN(size); + + /* Need more space? */ + if (size > Conf->avail) + { + Conf->firstfree = palloc0(COMPACT_ALLOC_CHUNK); + Conf->avail = COMPACT_ALLOC_CHUNK; + } + + result = (void *) Conf->firstfree; + Conf->firstfree += size; + Conf->avail -= size; + + return result; +} + +#define cpalloc(size) compact_palloc0(Conf, size) +#define cpalloc0(size) compact_palloc0(Conf, size) + +static char * +cpstrdup(IspellDict *Conf, const char *str) +{ + char *res = cpalloc(strlen(str) + 1); + + strcpy(res, str); + return res; } @@ -186,7 +243,7 @@ NIAddSpell(IspellDict *Conf, const char *word, const char *flag) { if (Conf->mspell) { - Conf->mspell += 1024 * 20; + Conf->mspell *= 2; Conf->Spell = (SPELL **) repalloc(Conf->Spell, Conf->mspell * sizeof(SPELL *)); } else @@ -324,7 +381,7 @@ NIAddAffix(IspellDict *Conf, int flag, char flagflags, const char *mask, const c { if (Conf->maffixes) { - Conf->maffixes += 16; + Conf->maffixes *= 2; Conf->Affix = (AFFIX *) repalloc((void *) Conf->Affix, Conf->maffixes * sizeof(AFFIX)); } else @@ -389,9 +446,9 @@ NIAddAffix(IspellDict *Conf, int flag, char flagflags, const char *mask, const c Affix->flag = flag; Affix->type = type; - Affix->find = (find && *find) ? pstrdup(find) : VoidString; + Affix->find = (find && *find) ? cpstrdup(Conf, find) : VoidString; if ((Affix->replen = strlen(repl)) > 0) - Affix->repl = pstrdup(repl); + Affix->repl = cpstrdup(Conf, repl); else Affix->repl = VoidString; Conf->naffixes++; @@ -843,8 +900,9 @@ MergeAffix(IspellDict *Conf, int a1, int a2) } ptr = Conf->AffixData + Conf->nAffixData; - *ptr = palloc(strlen(Conf->AffixData[a1]) + strlen(Conf->AffixData[a2]) + - 1 /* space */ + 1 /* \0 */ ); + *ptr = cpalloc(strlen(Conf->AffixData[a1]) + + strlen(Conf->AffixData[a2]) + + 1 /* space */ + 1 /* \0 */ ); sprintf(*ptr, "%s %s", Conf->AffixData[a1], Conf->AffixData[a2]); ptr++; *ptr = NULL; @@ -888,7 +946,7 @@ mkSPNode(IspellDict *Conf, int low, int high, int level) if (!nchar) return NULL; - rs = (SPNode *) palloc0(SPNHDRSZ + nchar * sizeof(SPNodeData)); + rs = (SPNode *) cpalloc0(SPNHDRSZ + nchar * sizeof(SPNodeData)); rs->length = nchar; data = rs->data; @@ -982,7 +1040,7 @@ NISortDictionary(IspellDict *Conf) { curaffix++; Assert(curaffix < naffix); - Conf->AffixData[curaffix] = pstrdup(Conf->Spell[i]->p.flag); + Conf->AffixData[curaffix] = cpstrdup(Conf, Conf->Spell[i]->p.flag); } Conf->Spell[i]->p.d.affix = curaffix; @@ -1020,7 +1078,7 @@ mkANode(IspellDict *Conf, int low, int high, int level, int type) aff = (AFFIX **) tmpalloc(sizeof(AFFIX *) * (high - low + 1)); naff = 0; - rs = (AffixNode *) palloc0(ANHRDSZ + nchar * sizeof(AffixNodeData)); + rs = (AffixNode *) cpalloc0(ANHRDSZ + nchar * sizeof(AffixNodeData)); rs->length = nchar; data = rs->data; @@ -1036,7 +1094,7 @@ mkANode(IspellDict *Conf, int low, int high, int level, int type) if (naff) { data->naff = naff; - data->aff = (AFFIX **) palloc(sizeof(AFFIX *) * naff); + data->aff = (AFFIX **) cpalloc(sizeof(AFFIX *) * naff); memcpy(data->aff, aff, sizeof(AFFIX *) * naff); naff = 0; } @@ -1056,7 +1114,7 @@ mkANode(IspellDict *Conf, int low, int high, int level, int type) if (naff) { data->naff = naff; - data->aff = (AFFIX **) palloc(sizeof(AFFIX *) * naff); + data->aff = (AFFIX **) cpalloc(sizeof(AFFIX *) * naff); memcpy(data->aff, aff, sizeof(AFFIX *) * naff); naff = 0; } @@ -1097,7 +1155,7 @@ mkVoidAffix(IspellDict *Conf, bool issuffix, int startsuffix) if (cnt == 0) return; - Affix->data->aff = (AFFIX **) palloc(sizeof(AFFIX *) * cnt); + Affix->data->aff = (AFFIX **) cpalloc(sizeof(AFFIX *) * cnt); Affix->data->naff = (uint32) cnt; cnt = 0; diff --git a/src/include/tsearch/dicts/spell.h b/src/include/tsearch/dicts/spell.h index 421a636dc7..b41fbb6fd5 100644 --- a/src/include/tsearch/dicts/spell.h +++ b/src/include/tsearch/dicts/spell.h @@ -161,6 +161,10 @@ typedef struct SPELL **Spell; int nspell; /* number of valid entries in Spell array */ int mspell; /* allocated length of Spell array */ + + /* These are used to allocate "compact" data without palloc overhead */ + char *firstfree; /* first free address (always maxaligned) */ + size_t avail; /* free space remaining at firstfree */ } IspellDict; extern TSLexeme *NINormalizeWord(IspellDict *Conf, char *word); -- 2.40.0