Commit 3e5f9412 authored by Tom Lane's avatar Tom Lane

Reduce the memory requirement for large ispell dictionaries.

This patch eliminates per-chunk palloc overhead for most small allocations
needed in the representation of an ispell dictionary.  This saves close to
a factor of 2 on the current Czech ispell data.  While it doesn't cover
every last small allocation in the ispell code, we are at the point of
diminishing returns, because about 95% of the allocations are covered
already.

Pavel Stehule, rather heavily revised by Tom
parent 9b910def
...@@ -59,6 +59,63 @@ NIFinishBuild(IspellDict *Conf) ...@@ -59,6 +59,63 @@ NIFinishBuild(IspellDict *Conf)
/* Just for cleanliness, zero the now-dangling pointers */ /* Just for cleanliness, zero the now-dangling pointers */
Conf->buildCxt = NULL; Conf->buildCxt = NULL;
Conf->Spell = NULL; Conf->Spell = NULL;
Conf->firstfree = NULL;
}
/*
* "Compact" palloc: allocate without extra palloc overhead.
*
* Since we have no need to free the ispell data items individually, there's
* not much value in the per-chunk overhead normally consumed by palloc.
* Getting rid of it is helpful since ispell can allocate a lot of small nodes.
*
* We currently pre-zero all data allocated this way, even though some of it
* doesn't need that. The cpalloc and cpalloc0 macros are just documentation
* to indicate which allocations actually require zeroing.
*/
#define COMPACT_ALLOC_CHUNK 8192 /* must be > aset.c's allocChunkLimit */
#define COMPACT_MAX_REQ 1024 /* must be < COMPACT_ALLOC_CHUNK */
static void *
compact_palloc0(IspellDict *Conf, size_t size)
{
void *result;
/* Should only be called during init */
Assert(Conf->buildCxt != NULL);
/* No point in this for large chunks */
if (size > COMPACT_MAX_REQ)
return palloc0(size);
/* Keep everything maxaligned */
size = MAXALIGN(size);
/* Need more space? */
if (size > Conf->avail)
{
Conf->firstfree = palloc0(COMPACT_ALLOC_CHUNK);
Conf->avail = COMPACT_ALLOC_CHUNK;
}
result = (void *) Conf->firstfree;
Conf->firstfree += size;
Conf->avail -= size;
return result;
}
#define cpalloc(size) compact_palloc0(Conf, size)
#define cpalloc0(size) compact_palloc0(Conf, size)
static char *
cpstrdup(IspellDict *Conf, const char *str)
{
char *res = cpalloc(strlen(str) + 1);
strcpy(res, str);
return res;
} }
...@@ -186,7 +243,7 @@ NIAddSpell(IspellDict *Conf, const char *word, const char *flag) ...@@ -186,7 +243,7 @@ NIAddSpell(IspellDict *Conf, const char *word, const char *flag)
{ {
if (Conf->mspell) if (Conf->mspell)
{ {
Conf->mspell += 1024 * 20; Conf->mspell *= 2;
Conf->Spell = (SPELL **) repalloc(Conf->Spell, Conf->mspell * sizeof(SPELL *)); Conf->Spell = (SPELL **) repalloc(Conf->Spell, Conf->mspell * sizeof(SPELL *));
} }
else else
...@@ -324,7 +381,7 @@ NIAddAffix(IspellDict *Conf, int flag, char flagflags, const char *mask, const c ...@@ -324,7 +381,7 @@ NIAddAffix(IspellDict *Conf, int flag, char flagflags, const char *mask, const c
{ {
if (Conf->maffixes) if (Conf->maffixes)
{ {
Conf->maffixes += 16; Conf->maffixes *= 2;
Conf->Affix = (AFFIX *) repalloc((void *) Conf->Affix, Conf->maffixes * sizeof(AFFIX)); Conf->Affix = (AFFIX *) repalloc((void *) Conf->Affix, Conf->maffixes * sizeof(AFFIX));
} }
else else
...@@ -389,9 +446,9 @@ NIAddAffix(IspellDict *Conf, int flag, char flagflags, const char *mask, const c ...@@ -389,9 +446,9 @@ NIAddAffix(IspellDict *Conf, int flag, char flagflags, const char *mask, const c
Affix->flag = flag; Affix->flag = flag;
Affix->type = type; Affix->type = type;
Affix->find = (find && *find) ? pstrdup(find) : VoidString; Affix->find = (find && *find) ? cpstrdup(Conf, find) : VoidString;
if ((Affix->replen = strlen(repl)) > 0) if ((Affix->replen = strlen(repl)) > 0)
Affix->repl = pstrdup(repl); Affix->repl = cpstrdup(Conf, repl);
else else
Affix->repl = VoidString; Affix->repl = VoidString;
Conf->naffixes++; Conf->naffixes++;
...@@ -843,8 +900,9 @@ MergeAffix(IspellDict *Conf, int a1, int a2) ...@@ -843,8 +900,9 @@ MergeAffix(IspellDict *Conf, int a1, int a2)
} }
ptr = Conf->AffixData + Conf->nAffixData; ptr = Conf->AffixData + Conf->nAffixData;
*ptr = palloc(strlen(Conf->AffixData[a1]) + strlen(Conf->AffixData[a2]) + *ptr = cpalloc(strlen(Conf->AffixData[a1]) +
1 /* space */ + 1 /* \0 */ ); strlen(Conf->AffixData[a2]) +
1 /* space */ + 1 /* \0 */ );
sprintf(*ptr, "%s %s", Conf->AffixData[a1], Conf->AffixData[a2]); sprintf(*ptr, "%s %s", Conf->AffixData[a1], Conf->AffixData[a2]);
ptr++; ptr++;
*ptr = NULL; *ptr = NULL;
...@@ -888,7 +946,7 @@ mkSPNode(IspellDict *Conf, int low, int high, int level) ...@@ -888,7 +946,7 @@ mkSPNode(IspellDict *Conf, int low, int high, int level)
if (!nchar) if (!nchar)
return NULL; return NULL;
rs = (SPNode *) palloc0(SPNHDRSZ + nchar * sizeof(SPNodeData)); rs = (SPNode *) cpalloc0(SPNHDRSZ + nchar * sizeof(SPNodeData));
rs->length = nchar; rs->length = nchar;
data = rs->data; data = rs->data;
...@@ -982,7 +1040,7 @@ NISortDictionary(IspellDict *Conf) ...@@ -982,7 +1040,7 @@ NISortDictionary(IspellDict *Conf)
{ {
curaffix++; curaffix++;
Assert(curaffix < naffix); Assert(curaffix < naffix);
Conf->AffixData[curaffix] = pstrdup(Conf->Spell[i]->p.flag); Conf->AffixData[curaffix] = cpstrdup(Conf, Conf->Spell[i]->p.flag);
} }
Conf->Spell[i]->p.d.affix = curaffix; Conf->Spell[i]->p.d.affix = curaffix;
...@@ -1020,7 +1078,7 @@ mkANode(IspellDict *Conf, int low, int high, int level, int type) ...@@ -1020,7 +1078,7 @@ mkANode(IspellDict *Conf, int low, int high, int level, int type)
aff = (AFFIX **) tmpalloc(sizeof(AFFIX *) * (high - low + 1)); aff = (AFFIX **) tmpalloc(sizeof(AFFIX *) * (high - low + 1));
naff = 0; naff = 0;
rs = (AffixNode *) palloc0(ANHRDSZ + nchar * sizeof(AffixNodeData)); rs = (AffixNode *) cpalloc0(ANHRDSZ + nchar * sizeof(AffixNodeData));
rs->length = nchar; rs->length = nchar;
data = rs->data; data = rs->data;
...@@ -1036,7 +1094,7 @@ mkANode(IspellDict *Conf, int low, int high, int level, int type) ...@@ -1036,7 +1094,7 @@ mkANode(IspellDict *Conf, int low, int high, int level, int type)
if (naff) if (naff)
{ {
data->naff = naff; data->naff = naff;
data->aff = (AFFIX **) palloc(sizeof(AFFIX *) * naff); data->aff = (AFFIX **) cpalloc(sizeof(AFFIX *) * naff);
memcpy(data->aff, aff, sizeof(AFFIX *) * naff); memcpy(data->aff, aff, sizeof(AFFIX *) * naff);
naff = 0; naff = 0;
} }
...@@ -1056,7 +1114,7 @@ mkANode(IspellDict *Conf, int low, int high, int level, int type) ...@@ -1056,7 +1114,7 @@ mkANode(IspellDict *Conf, int low, int high, int level, int type)
if (naff) if (naff)
{ {
data->naff = naff; data->naff = naff;
data->aff = (AFFIX **) palloc(sizeof(AFFIX *) * naff); data->aff = (AFFIX **) cpalloc(sizeof(AFFIX *) * naff);
memcpy(data->aff, aff, sizeof(AFFIX *) * naff); memcpy(data->aff, aff, sizeof(AFFIX *) * naff);
naff = 0; naff = 0;
} }
...@@ -1097,7 +1155,7 @@ mkVoidAffix(IspellDict *Conf, bool issuffix, int startsuffix) ...@@ -1097,7 +1155,7 @@ mkVoidAffix(IspellDict *Conf, bool issuffix, int startsuffix)
if (cnt == 0) if (cnt == 0)
return; return;
Affix->data->aff = (AFFIX **) palloc(sizeof(AFFIX *) * cnt); Affix->data->aff = (AFFIX **) cpalloc(sizeof(AFFIX *) * cnt);
Affix->data->naff = (uint32) cnt; Affix->data->naff = (uint32) cnt;
cnt = 0; cnt = 0;
......
...@@ -161,6 +161,10 @@ typedef struct ...@@ -161,6 +161,10 @@ typedef struct
SPELL **Spell; SPELL **Spell;
int nspell; /* number of valid entries in Spell array */ int nspell; /* number of valid entries in Spell array */
int mspell; /* allocated length of Spell array */ int mspell; /* allocated length of Spell array */
/* These are used to allocate "compact" data without palloc overhead */
char *firstfree; /* first free address (always maxaligned) */
size_t avail; /* free space remaining at firstfree */
} IspellDict; } IspellDict;
extern TSLexeme *NINormalizeWord(IspellDict *Conf, char *word); extern TSLexeme *NINormalizeWord(IspellDict *Conf, char *word);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment