]> granicus.if.org Git - postgresql/blob - src/backend/tsearch/spell.c
Fix core dump with buffer-overrun by too long infinitive. Add checking of using
[postgresql] / src / backend / tsearch / spell.c
1 /*-------------------------------------------------------------------------
2  *
3  * spell.c
4  *              Normalizing word with ISpell
5  *
6  * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
7  *
8  *
9  * IDENTIFICATION
10  *        $PostgreSQL: pgsql/src/backend/tsearch/spell.c,v 1.10 2008/01/16 13:01:03 teodor Exp $
11  *
12  *-------------------------------------------------------------------------
13  */
14
15 #include "postgres.h"
16
17 #include "storage/fd.h"
18 #include "tsearch/dicts/spell.h"
19 #include "tsearch/ts_locale.h"
20 #include "utils/memutils.h"
21
22
23 /*
24  * Initialization requires a lot of memory that's not needed
25  * after the initialization is done.  In init function,
26  * CurrentMemoryContext is a long lived memory context associated
27  * with the dictionary cache entry, so we use a temporary context
28  * for the short-lived stuff.
29  */
30 static MemoryContext tmpCtx = NULL;
31
32 #define tmpalloc(sz)  MemoryContextAlloc(tmpCtx, (sz))
33 #define tmpalloc0(sz)  MemoryContextAllocZero(tmpCtx, (sz))
34
35 static void
36 checkTmpCtx(void)
37 {
38         /*
39          * XXX: This assumes that CurrentMemoryContext doesn't have any children
40          * other than the one we create here.
41          */
42         if (CurrentMemoryContext->firstchild == NULL)
43         {
44                 tmpCtx = AllocSetContextCreate(CurrentMemoryContext,
45                                                                            "Ispell dictionary init context",
46                                                                            ALLOCSET_DEFAULT_MINSIZE,
47                                                                            ALLOCSET_DEFAULT_INITSIZE,
48                                                                            ALLOCSET_DEFAULT_MAXSIZE);
49         }
50         else
51                 tmpCtx = CurrentMemoryContext->firstchild;
52 }
53
54 static char *
55 lowerstr_ctx(char *src)
56 {
57         MemoryContext saveCtx;
58         char       *dst;
59
60         saveCtx = MemoryContextSwitchTo(tmpCtx);
61         dst = lowerstr(src);
62         MemoryContextSwitchTo(saveCtx);
63
64         return dst;
65 }
66
67 #define MAX_NORM 1024
68 #define MAXNORMLEN 256
69
70 #define STRNCMP(s,p)    strncmp( (s), (p), strlen(p) )
71 #define GETWCHAR(W,L,N,T) ( ((uint8*)(W))[ ((T)==FF_PREFIX) ? (N) : ( (L) - 1 - (N) ) ] )
72 #define GETCHAR(A,N,T)    GETWCHAR( (A)->repl, (A)->replen, N, T )
73
74 static char *VoidString = "";
75
76 static int
77 cmpspell(const void *s1, const void *s2)
78 {
79         return (strcmp((*(const SPELL **) s1)->word, (*(const SPELL **) s2)->word));
80 }
81 static int
82 cmpspellaffix(const void *s1, const void *s2)
83 {
84         return (strncmp((*(const SPELL **) s1)->p.flag, (*(const SPELL **) s2)->p.flag, MAXFLAGLEN));
85 }
86
87 static char *
88 findchar(char *str, int c)
89 {
90         while (*str)
91         {
92                 if (t_iseq(str, c))
93                         return str;
94                 str += pg_mblen(str);
95         }
96
97         return NULL;
98 }
99
100
101 /* backward string compare for suffix tree operations */
102 static int
103 strbcmp(const unsigned char *s1, const unsigned char *s2)
104 {
105         int                     l1 = strlen((const char *) s1) - 1,
106                                 l2 = strlen((const char *) s2) - 1;
107
108         while (l1 >= 0 && l2 >= 0)
109         {
110                 if (s1[l1] < s2[l2])
111                         return -1;
112                 if (s1[l1] > s2[l2])
113                         return 1;
114                 l1--;
115                 l2--;
116         }
117         if (l1 < l2)
118                 return -1;
119         if (l1 > l2)
120                 return 1;
121
122         return 0;
123 }
124 static int
125 strbncmp(const unsigned char *s1, const unsigned char *s2, size_t count)
126 {
127         int                     l1 = strlen((const char *) s1) - 1,
128                                 l2 = strlen((const char *) s2) - 1,
129                                 l = count;
130
131         while (l1 >= 0 && l2 >= 0 && l > 0)
132         {
133                 if (s1[l1] < s2[l2])
134                         return -1;
135                 if (s1[l1] > s2[l2])
136                         return 1;
137                 l1--;
138                 l2--;
139                 l--;
140         }
141         if (l == 0)
142                 return 0;
143         if (l1 < l2)
144                 return -1;
145         if (l1 > l2)
146                 return 1;
147         return 0;
148 }
149
150 static int
151 cmpaffix(const void *s1, const void *s2)
152 {
153         const AFFIX *a1 = (const AFFIX *) s1;
154         const AFFIX *a2 = (const AFFIX *) s2;
155
156         if (a1->type < a2->type)
157                 return -1;
158         if (a1->type > a2->type)
159                 return 1;
160         if (a1->type == FF_PREFIX)
161                 return strcmp(a1->repl, a2->repl);
162         else
163                 return strbcmp((const unsigned char *) a1->repl,
164                                            (const unsigned char *) a2->repl);
165 }
166
167 static void
168 NIAddSpell(IspellDict *Conf, const char *word, const char *flag)
169 {
170         if (Conf->nspell >= Conf->mspell)
171         {
172                 if (Conf->mspell)
173                 {
174                         Conf->mspell += 1024 * 20;
175                         Conf->Spell = (SPELL **) repalloc(Conf->Spell, Conf->mspell * sizeof(SPELL *));
176                 }
177                 else
178                 {
179                         Conf->mspell = 1024 * 20;
180                         Conf->Spell = (SPELL **) tmpalloc(Conf->mspell * sizeof(SPELL *));
181                 }
182         }
183         Conf->Spell[Conf->nspell] = (SPELL *) tmpalloc(SPELLHDRSZ + strlen(word) + 1);
184         strcpy(Conf->Spell[Conf->nspell]->word, word);
185         strncpy(Conf->Spell[Conf->nspell]->p.flag, flag, MAXFLAGLEN);
186         Conf->nspell++;
187 }
188
189 /*
190  * import dictionary
191  *
192  * Note caller must already have applied get_tsearch_config_filename
193  */
194 void
195 NIImportDictionary(IspellDict *Conf, const char *filename)
196 {
197         FILE       *dict;
198         char       *line;
199
200         checkTmpCtx();
201
202         if (!(dict = AllocateFile(filename, "r")))
203                 ereport(ERROR,
204                                 (errcode(ERRCODE_CONFIG_FILE_ERROR),
205                                  errmsg("could not open dictionary file \"%s\": %m",
206                                                 filename)));
207
208         while ((line = t_readline(dict)) != NULL)
209         {
210                 char       *s,
211                                    *pstr;
212                 const char *flag;
213
214                 /* Extract flag from the line */
215                 flag = NULL;
216                 if ((s = findchar(line, '/')))
217                 {
218                         *s++ = '\0';
219                         flag = s;
220                         while (*s)
221                         {
222                                 /* we allow only single encoded flags for faster works */
223                                 if (pg_mblen(s) == 1 && t_isprint(s) && !t_isspace(s))
224                                         s++;
225                                 else
226                                 {
227                                         *s = '\0';
228                                         break;
229                                 }
230                         }
231                 }
232                 else
233                         flag = "";
234
235                 /* Remove trailing spaces */
236                 s = line;
237                 while (*s)
238                 {
239                         if (t_isspace(s))
240                         {
241                                 *s = '\0';
242                                 break;
243                         }
244                         s += pg_mblen(s);
245                 }
246                 pstr = lowerstr_ctx(line);
247
248                 NIAddSpell(Conf, pstr, flag);
249                 pfree(pstr);
250
251                 pfree(line);
252         }
253         FreeFile(dict);
254 }
255
256
257 static int
258 FindWord(IspellDict *Conf, const char *word, int affixflag, int flag)
259 {
260         SPNode     *node = Conf->Dictionary;
261         SPNodeData *StopLow,
262                            *StopHigh,
263                            *StopMiddle;
264         uint8      *ptr = (uint8 *) word;
265
266         flag &= FF_DICTFLAGMASK;
267
268         while (node && *ptr)
269         {
270                 StopLow = node->data;
271                 StopHigh = node->data + node->length;
272                 while (StopLow < StopHigh)
273                 {
274                         StopMiddle = StopLow + ((StopHigh - StopLow) >> 1);
275                         if (StopMiddle->val == *ptr)
276                         {
277                                 if (*(ptr + 1) == '\0' && StopMiddle->isword)
278                                 {
279                                         if (flag == 0)
280                                         {
281                                                 if (StopMiddle->compoundflag & FF_COMPOUNDONLY)
282                                                         return 0;
283                                         }
284                                         else if ((flag & StopMiddle->compoundflag) == 0)
285                                                 return 0;
286
287                                         if ((affixflag == 0) || (strchr(Conf->AffixData[StopMiddle->affix], affixflag) != NULL))
288                                                 return 1;
289                                 }
290                                 node = StopMiddle->node;
291                                 ptr++;
292                                 break;
293                         }
294                         else if (StopMiddle->val < *ptr)
295                                 StopLow = StopMiddle + 1;
296                         else
297                                 StopHigh = StopMiddle;
298                 }
299                 if (StopLow >= StopHigh)
300                         break;
301         }
302         return 0;
303 }
304
305 static void
306 NIAddAffix(IspellDict *Conf, int flag, char flagflags, const char *mask, const char *find, const char *repl, int type)
307 {
308         AFFIX      *Affix;
309
310         if (Conf->naffixes >= Conf->maffixes)
311         {
312                 if (Conf->maffixes)
313                 {
314                         Conf->maffixes += 16;
315                         Conf->Affix = (AFFIX *) repalloc((void *) Conf->Affix, Conf->maffixes * sizeof(AFFIX));
316                 }
317                 else
318                 {
319                         Conf->maffixes = 16;
320                         Conf->Affix = (AFFIX *) palloc(Conf->maffixes * sizeof(AFFIX));
321                 }
322         }
323
324         Affix = Conf->Affix + Conf->naffixes;
325
326         if (strcmp(mask, ".") == 0)
327         {
328                 Affix->issimple = 1;
329                 Affix->isregis = 0;
330         }
331         else if (RS_isRegis(mask))
332         {
333                 Affix->issimple = 0;
334                 Affix->isregis = 1;
335                 RS_compile(&(Affix->reg.regis), (type == FF_SUFFIX) ? true : false,
336                                    (char *) ((mask && *mask) ? mask : VoidString));
337         }
338         else
339         {
340                 int                     masklen;
341                 int                     wmasklen;
342                 int                     err;
343                 pg_wchar   *wmask;
344                 char       *tmask;
345
346                 Affix->issimple = 0;
347                 Affix->isregis = 0;
348                 tmask = (char *) tmpalloc(strlen(mask) + 3);
349                 if (type == FF_SUFFIX)
350                         sprintf(tmask, "%s$", mask);
351                 else
352                         sprintf(tmask, "^%s", mask);
353
354                 masklen = strlen(tmask);
355                 wmask = (pg_wchar *) tmpalloc((masklen + 1) * sizeof(pg_wchar));
356                 wmasklen = pg_mb2wchar_with_len(tmask, wmask, masklen);
357
358                 err = pg_regcomp(&(Affix->reg.regex), wmask, wmasklen, REG_ADVANCED | REG_NOSUB);
359                 if (err)
360                 {
361                         char            errstr[100];
362
363                         pg_regerror(err, &(Affix->reg.regex), errstr, sizeof(errstr));
364                         ereport(ERROR,
365                                         (errcode(ERRCODE_INVALID_REGULAR_EXPRESSION),
366                                          errmsg("invalid regular expression: %s", errstr)));
367                 }
368         }
369
370         Affix->flagflags = flagflags;
371         if ((Affix->flagflags & FF_COMPOUNDONLY) || (Affix->flagflags & FF_COMPOUNDPERMITFLAG))
372         {
373                 if ((Affix->flagflags & FF_COMPOUNDFLAG) == 0)
374                         Affix->flagflags |= FF_COMPOUNDFLAG;
375         }
376         Affix->flag = flag;
377         Affix->type = type;
378
379         Affix->find = (find && *find) ? pstrdup(find) : VoidString;
380         if ((Affix->replen = strlen(repl)) > 0)
381                 Affix->repl = pstrdup(repl);
382         else
383                 Affix->repl = VoidString;
384         Conf->naffixes++;
385 }
386
387 #define PAE_WAIT_MASK   0
388 #define PAE_INMASK      1
389 #define PAE_WAIT_FIND   2
390 #define PAE_INFIND      3
391 #define PAE_WAIT_REPL   4
392 #define PAE_INREPL      5
393
394 static bool
395 parse_affentry(char *str, char *mask, char *find, char *repl,
396                            const char *filename, int lineno)
397 {
398         int                     state = PAE_WAIT_MASK;
399         char       *pmask = mask,
400                            *pfind = find,
401                            *prepl = repl;
402
403         *mask = *find = *repl = '\0';
404
405         while (*str)
406         {
407                 if (state == PAE_WAIT_MASK)
408                 {
409                         if (t_iseq(str, '#'))
410                                 return false;
411                         else if (!t_isspace(str))
412                         {
413                                 COPYCHAR(pmask, str);
414                                 pmask += pg_mblen(str);
415                                 state = PAE_INMASK;
416                         }
417                 }
418                 else if (state == PAE_INMASK)
419                 {
420                         if (t_iseq(str, '>'))
421                         {
422                                 *pmask = '\0';
423                                 state = PAE_WAIT_FIND;
424                         }
425                         else if (!t_isspace(str))
426                         {
427                                 COPYCHAR(pmask, str);
428                                 pmask += pg_mblen(str);
429                         }
430                 }
431                 else if (state == PAE_WAIT_FIND)
432                 {
433                         if (t_iseq(str, '-'))
434                         {
435                                 state = PAE_INFIND;
436                         }
437                         else if (t_isalpha(str) || t_iseq(str, '\'') /* english 's */ )
438                         {
439                                 COPYCHAR(prepl, str);
440                                 prepl += pg_mblen(str);
441                                 state = PAE_INREPL;
442                         }
443                         else if (!t_isspace(str))
444                                 ereport(ERROR,
445                                                 (errcode(ERRCODE_CONFIG_FILE_ERROR),
446                                            errmsg("syntax error at line %d of affix file \"%s\"",
447                                                           lineno, filename)));
448                 }
449                 else if (state == PAE_INFIND)
450                 {
451                         if (t_iseq(str, ','))
452                         {
453                                 *pfind = '\0';
454                                 state = PAE_WAIT_REPL;
455                         }
456                         else if (t_isalpha(str))
457                         {
458                                 COPYCHAR(pfind, str);
459                                 pfind += pg_mblen(str);
460                         }
461                         else if (!t_isspace(str))
462                                 ereport(ERROR,
463                                                 (errcode(ERRCODE_CONFIG_FILE_ERROR),
464                                            errmsg("syntax error at line %d of affix file \"%s\"",
465                                                           lineno, filename)));
466                 }
467                 else if (state == PAE_WAIT_REPL)
468                 {
469                         if (t_iseq(str, '-'))
470                         {
471                                 break;                  /* void repl */
472                         }
473                         else if (t_isalpha(str))
474                         {
475                                 COPYCHAR(prepl, str);
476                                 prepl += pg_mblen(str);
477                                 state = PAE_INREPL;
478                         }
479                         else if (!t_isspace(str))
480                                 ereport(ERROR,
481                                                 (errcode(ERRCODE_CONFIG_FILE_ERROR),
482                                            errmsg("syntax error at line %d of affix file \"%s\"",
483                                                           lineno, filename)));
484                 }
485                 else if (state == PAE_INREPL)
486                 {
487                         if (t_iseq(str, '#'))
488                         {
489                                 *prepl = '\0';
490                                 break;
491                         }
492                         else if (t_isalpha(str))
493                         {
494                                 COPYCHAR(prepl, str);
495                                 prepl += pg_mblen(str);
496                         }
497                         else if (!t_isspace(str))
498                                 ereport(ERROR,
499                                                 (errcode(ERRCODE_CONFIG_FILE_ERROR),
500                                            errmsg("syntax error at line %d of affix file \"%s\"",
501                                                           lineno, filename)));
502                 }
503                 else
504                         elog(ERROR, "unrecognized state in parse_affentry: %d", state);
505
506                 str += pg_mblen(str);
507         }
508
509         *pmask = *pfind = *prepl = '\0';
510
511         return (*mask && (*find || *repl)) ? true : false;
512 }
513
514 static void
515 addFlagValue(IspellDict *Conf, char *s, uint32 val,
516                          const char *filename, int lineno)
517 {
518         while (*s && t_isspace(s))
519                 s++;
520
521         if (!*s)
522                 ereport(ERROR,
523                                 (errcode(ERRCODE_CONFIG_FILE_ERROR),
524                                  errmsg("syntax error at line %d of affix file \"%s\"",
525                                                 lineno, filename)));
526
527         if (pg_mblen(s) != 1)
528                 ereport(ERROR,
529                                 (errcode(ERRCODE_CONFIG_FILE_ERROR),
530                                  errmsg("multibyte flag character is not allowed at line %d of affix file \"%s\"",
531                                                 lineno, filename)));
532
533         Conf->flagval[(unsigned int) *s] = (unsigned char) val;
534         Conf->usecompound = true;
535 }
536
537 static void
538 NIImportOOAffixes(IspellDict *Conf, const char *filename)
539 {
540         char            type[BUFSIZ],
541                            *ptype = NULL;
542         char            sflag[BUFSIZ];
543         char            mask[BUFSIZ],
544                            *pmask;
545         char            find[BUFSIZ],
546                            *pfind;
547         char            repl[BUFSIZ],
548                            *prepl;
549         bool            isSuffix = false;
550         int                     flag = 0;
551         char            flagflags = 0;
552         FILE       *affix;
553         int                     lineno = 0;
554         int                     scanread = 0;
555         char            scanbuf[BUFSIZ];
556         char       *recoded;
557
558         checkTmpCtx();
559
560         /* read file to find any flag */
561         memset(Conf->flagval, 0, sizeof(Conf->flagval));
562         Conf->usecompound = false;
563
564         if (!(affix = AllocateFile(filename, "r")))
565                 ereport(ERROR,
566                                 (errcode(ERRCODE_CONFIG_FILE_ERROR),
567                                  errmsg("could not open affix file \"%s\": %m",
568                                                 filename)));
569
570         while ((recoded = t_readline(affix)) != NULL)
571         {
572                 lineno++;
573
574                 if (*recoded == '\0' || t_isspace(recoded) || t_iseq(recoded, '#'))
575                 {
576                         pfree(recoded);
577                         continue;
578                 }
579
580                 if (STRNCMP(recoded, "COMPOUNDFLAG") == 0)
581                         addFlagValue(Conf, recoded + strlen("COMPOUNDFLAG"),
582                                                  FF_COMPOUNDFLAG, filename, lineno);
583                 else if (STRNCMP(recoded, "COMPOUNDBEGIN") == 0)
584                         addFlagValue(Conf, recoded + strlen("COMPOUNDBEGIN"),
585                                                  FF_COMPOUNDBEGIN, filename, lineno);
586                 else if (STRNCMP(recoded, "COMPOUNDLAST") == 0)
587                         addFlagValue(Conf, recoded + strlen("COMPOUNDLAST"),
588                                                  FF_COMPOUNDLAST, filename, lineno);
589                 /* COMPOUNDLAST and COMPOUNDEND are synonyms */
590                 else if (STRNCMP(recoded, "COMPOUNDEND") == 0)
591                         addFlagValue(Conf, recoded + strlen("COMPOUNDEND"),
592                                                  FF_COMPOUNDLAST, filename, lineno);
593                 else if (STRNCMP(recoded, "COMPOUNDMIDDLE") == 0)
594                         addFlagValue(Conf, recoded + strlen("COMPOUNDMIDDLE"),
595                                                  FF_COMPOUNDMIDDLE, filename, lineno);
596                 else if (STRNCMP(recoded, "ONLYINCOMPOUND") == 0)
597                         addFlagValue(Conf, recoded + strlen("ONLYINCOMPOUND"),
598                                                  FF_COMPOUNDONLY, filename, lineno);
599                 else if (STRNCMP(recoded, "COMPOUNDPERMITFLAG") == 0)
600                         addFlagValue(Conf, recoded + strlen("COMPOUNDPERMITFLAG"),
601                                                  FF_COMPOUNDPERMITFLAG, filename, lineno);
602                 else if (STRNCMP(recoded, "COMPOUNDFORBIDFLAG") == 0)
603                         addFlagValue(Conf, recoded + strlen("COMPOUNDFORBIDFLAG"),
604                                                  FF_COMPOUNDFORBIDFLAG, filename, lineno);
605                 else if (STRNCMP(recoded, "FLAG") == 0)
606                 {
607                         char       *s = recoded + strlen("FLAG");
608
609                         while (*s && t_isspace(s))
610                                 s++;
611
612                         if (*s && STRNCMP(s, "default") != 0)
613                                 ereport(ERROR,
614                                                 (errcode(ERRCODE_CONFIG_FILE_ERROR),
615                                                  errmsg("Ispell dictionary supports only default flag value at line %d of affix file \"%s\"",
616                                                                 lineno, filename)));
617                 }
618
619                 pfree(recoded);
620         }
621         FreeFile(affix);
622         lineno = 0;
623
624         sprintf(scanbuf, "%%6s %%%ds %%%ds %%%ds %%%ds", BUFSIZ / 5, BUFSIZ / 5, BUFSIZ / 5, BUFSIZ / 5);
625
626         if (!(affix = AllocateFile(filename, "r")))
627                 ereport(ERROR,
628                                 (errcode(ERRCODE_CONFIG_FILE_ERROR),
629                                  errmsg("could not open affix file \"%s\": %m",
630                                                 filename)));
631
632         while ((recoded = t_readline(affix)) != NULL)
633         {
634                 lineno++;
635                 if (*recoded == '\0' || t_isspace(recoded) || t_iseq(recoded, '#'))
636                         goto nextline;
637
638                 scanread = sscanf(recoded, scanbuf, type, sflag, find, repl, mask);
639
640                 if (ptype)
641                         pfree(ptype);
642                 ptype = lowerstr_ctx(type);
643                 if (scanread < 4 || (STRNCMP(ptype, "sfx") && STRNCMP(ptype, "pfx")))
644                         goto nextline;
645
646                 if (scanread == 4)
647                 {
648                         if (strlen(sflag) != 1)
649                                 goto nextline;
650                         flag = *sflag;
651                         isSuffix = (STRNCMP(ptype, "sfx") == 0) ? true : false;
652                         if (t_iseq(find, 'y') || t_iseq(find, 'Y'))
653                                 flagflags = FF_CROSSPRODUCT;
654                         else
655                                 flagflags = 0;
656                 }
657                 else
658                 {
659                         char       *ptr;
660                         int                     aflg = 0;
661
662                         if (strlen(sflag) != 1 || flag != *sflag || flag == 0)
663                                 goto nextline;
664                         prepl = lowerstr_ctx(repl);
665                         /* affix flag */
666                         if ((ptr = strchr(prepl, '/')) != NULL)
667                         {
668                                 *ptr = '\0';
669                                 ptr = repl + (ptr - prepl) + 1;
670                                 while (*ptr)
671                                 {
672                                         aflg |= Conf->flagval[(unsigned int) *ptr];
673                                         ptr++;
674                                 }
675                         }
676                         pfind = lowerstr_ctx(find);
677                         pmask = lowerstr_ctx(mask);
678                         if (t_iseq(find, '0'))
679                                 *pfind = '\0';
680                         if (t_iseq(repl, '0'))
681                                 *prepl = '\0';
682
683                         NIAddAffix(Conf, flag, flagflags | aflg, pmask, pfind, prepl,
684                                            isSuffix ? FF_SUFFIX : FF_PREFIX);
685                         pfree(prepl);
686                         pfree(pfind);
687                         pfree(pmask);
688                 }
689
690 nextline:
691                 pfree(recoded);
692         }
693
694         if (ptype)
695                 pfree(ptype);
696         FreeFile(affix);
697 }
698
699 /*
700  * import affixes
701  *
702  * Note caller must already have applied get_tsearch_config_filename
703  */
704 void
705 NIImportAffixes(IspellDict *Conf, const char *filename)
706 {
707         char       *pstr = NULL;
708         char            mask[BUFSIZ];
709         char            find[BUFSIZ];
710         char            repl[BUFSIZ];
711         char       *s;
712         bool            suffixes = false;
713         bool            prefixes = false;
714         int                     flag = 0;
715         char            flagflags = 0;
716         FILE       *affix;
717         int                     lineno = 0;
718         bool            oldformat = false;
719         char       *recoded = NULL;
720
721         checkTmpCtx();
722
723         if (!(affix = AllocateFile(filename, "r")))
724                 ereport(ERROR,
725                                 (errcode(ERRCODE_CONFIG_FILE_ERROR),
726                                  errmsg("could not open affix file \"%s\": %m",
727                                                 filename)));
728
729         memset(Conf->flagval, 0, sizeof(Conf->flagval));
730         Conf->usecompound = false;
731
732         while ((recoded = t_readline(affix)) != NULL)
733         {
734                 pstr = lowerstr(recoded);
735
736                 lineno++;
737
738                 /* Skip comments and empty lines */
739                 if (*pstr == '#' || *pstr == '\n')
740                         goto nextline;
741
742                 if (STRNCMP(pstr, "compoundwords") == 0)
743                 {
744                         s = findchar(pstr, 'l');
745                         if (s)
746                         {
747                                 s = recoded + (s - pstr);               /* we need non-lowercased
748                                                                                                  * string */
749                                 while (*s && !t_isspace(s))
750                                         s++;
751                                 while (*s && t_isspace(s))
752                                         s++;
753
754                                 if (*s && pg_mblen(s) == 1)
755                                 {
756                                         Conf->flagval[(unsigned int) *s] = FF_COMPOUNDFLAG;
757                                         Conf->usecompound = true;
758                                 }
759                                 oldformat = true;
760                                 goto nextline;
761                         }
762                 }
763                 if (STRNCMP(pstr, "suffixes") == 0)
764                 {
765                         suffixes = true;
766                         prefixes = false;
767                         oldformat = true;
768                         goto nextline;
769                 }
770                 if (STRNCMP(pstr, "prefixes") == 0)
771                 {
772                         suffixes = false;
773                         prefixes = true;
774                         oldformat = true;
775                         goto nextline;
776                 }
777                 if (STRNCMP(pstr, "flag") == 0)
778                 {
779                         s = recoded + 4;        /* we need non-lowercased string */
780                         flagflags = 0;
781
782                         while (*s && t_isspace(s))
783                                 s++;
784                         oldformat = true;
785
786                         /* allow only single-encoded flags */
787                         if (pg_mblen(s) != 1)
788                                 ereport(ERROR,
789                                                 (errcode(ERRCODE_CONFIG_FILE_ERROR),
790                                                  errmsg("multibyte flag character is not allowed at line %d of affix file \"%s\"",
791                                                                 lineno, filename)));
792
793                         if (*s == '*')
794                         {
795                                 flagflags |= FF_CROSSPRODUCT;
796                                 s++;
797                         }
798                         else if (*s == '~')
799                         {
800                                 flagflags |= FF_COMPOUNDONLY;
801                                 s++;
802                         }
803
804                         if (*s == '\\')
805                                 s++;
806
807                         /* allow only single-encoded flags */
808                         if (pg_mblen(s) != 1)
809                                 ereport(ERROR,
810                                                 (errcode(ERRCODE_CONFIG_FILE_ERROR),
811                                                  errmsg("multibyte flag character is not allowed at line %d of affix file \"%s\"",
812                                                                 lineno, filename)));
813
814                         flag = (unsigned char) *s;
815                         goto nextline;
816                 }
817                 if (STRNCMP(recoded, "COMPOUNDFLAG") == 0 || STRNCMP(recoded, "COMPOUNDMIN") == 0 ||
818                         STRNCMP(recoded, "PFX") == 0 || STRNCMP(recoded, "SFX") == 0)
819                 {
820                         if (oldformat)
821                                 ereport(ERROR,
822                                                 (errcode(ERRCODE_CONFIG_FILE_ERROR),
823                                                  errmsg("wrong affix file format for flag at line %d of affix file \"%s\"",
824                                                                 lineno, filename)));
825                         FreeFile(affix);
826                         NIImportOOAffixes(Conf, filename);
827                         return;
828                 }
829                 if ((!suffixes) && (!prefixes))
830                         goto nextline;
831
832                 if (!parse_affentry(pstr, mask, find, repl, filename, lineno))
833                         goto nextline;
834
835                 NIAddAffix(Conf, flag, flagflags, mask, find, repl, suffixes ? FF_SUFFIX : FF_PREFIX);
836
837 nextline:
838                 pfree(recoded);
839                 pfree(pstr);
840         }
841         FreeFile(affix);
842 }
843
844 static int
845 MergeAffix(IspellDict *Conf, int a1, int a2)
846 {
847         char      **ptr;
848
849         while (Conf->nAffixData + 1 >= Conf->lenAffixData)
850         {
851                 Conf->lenAffixData *= 2;
852                 Conf->AffixData = (char **) repalloc(Conf->AffixData,
853                                                                                 sizeof(char *) * Conf->lenAffixData);
854         }
855
856         ptr = Conf->AffixData + Conf->nAffixData;
857         *ptr = palloc(strlen(Conf->AffixData[a1]) + strlen(Conf->AffixData[a2]) +
858                                   1 /* space */ + 1 /* \0 */ );
859         sprintf(*ptr, "%s %s", Conf->AffixData[a1], Conf->AffixData[a2]);
860         ptr++;
861         *ptr = NULL;
862         Conf->nAffixData++;
863
864         return Conf->nAffixData - 1;
865 }
866
867 static uint32
868 makeCompoundFlags(IspellDict *Conf, int affix)
869 {
870         uint32          flag = 0;
871         char       *str = Conf->AffixData[affix];
872
873         while (str && *str)
874         {
875                 flag |= Conf->flagval[(unsigned int) *str];
876                 str++;
877         }
878
879         return (flag & FF_DICTFLAGMASK);
880 }
881
882 static SPNode *
883 mkSPNode(IspellDict *Conf, int low, int high, int level)
884 {
885         int                     i;
886         int                     nchar = 0;
887         char            lastchar = '\0';
888         SPNode     *rs;
889         SPNodeData *data;
890         int                     lownew = low;
891
892         for (i = low; i < high; i++)
893                 if (Conf->Spell[i]->p.d.len > level && lastchar != Conf->Spell[i]->word[level])
894                 {
895                         nchar++;
896                         lastchar = Conf->Spell[i]->word[level];
897                 }
898
899         if (!nchar)
900                 return NULL;
901
902         rs = (SPNode *) palloc0(SPNHDRSZ + nchar * sizeof(SPNodeData));
903         rs->length = nchar;
904         data = rs->data;
905
906         lastchar = '\0';
907         for (i = low; i < high; i++)
908                 if (Conf->Spell[i]->p.d.len > level)
909                 {
910                         if (lastchar != Conf->Spell[i]->word[level])
911                         {
912                                 if (lastchar)
913                                 {
914                                         data->node = mkSPNode(Conf, lownew, i, level + 1);
915                                         lownew = i;
916                                         data++;
917                                 }
918                                 lastchar = Conf->Spell[i]->word[level];
919                         }
920                         data->val = ((uint8 *) (Conf->Spell[i]->word))[level];
921                         if (Conf->Spell[i]->p.d.len == level + 1)
922                         {
923                                 bool            clearCompoundOnly = false;
924
925                                 if (data->isword && data->affix != Conf->Spell[i]->p.d.affix)
926                                 {
927                                         /*
928                                          * MergeAffix called a few times. If one of word is
929                                          * allowed to be in compound word and another isn't, then
930                                          * clear FF_COMPOUNDONLY flag.
931                                          */
932
933                                         clearCompoundOnly = (FF_COMPOUNDONLY & data->compoundflag
934                                                 & makeCompoundFlags(Conf, Conf->Spell[i]->p.d.affix))
935                                                 ? false : true;
936                                         data->affix = MergeAffix(Conf, data->affix, Conf->Spell[i]->p.d.affix);
937                                 }
938                                 else
939                                         data->affix = Conf->Spell[i]->p.d.affix;
940                                 data->isword = 1;
941
942                                 data->compoundflag = makeCompoundFlags(Conf, data->affix);
943
944                                 if ((data->compoundflag & FF_COMPOUNDONLY) &&
945                                         (data->compoundflag & FF_COMPOUNDFLAG) == 0)
946                                         data->compoundflag |= FF_COMPOUNDFLAG;
947
948                                 if (clearCompoundOnly)
949                                         data->compoundflag &= ~FF_COMPOUNDONLY;
950                         }
951                 }
952
953         data->node = mkSPNode(Conf, lownew, high, level + 1);
954
955         return rs;
956 }
957
958 /*
959  * Builds the Conf->Dictionary tree and AffixData from the imported dictionary
960  * and affixes.
961  */
962 void
963 NISortDictionary(IspellDict *Conf)
964 {
965         int                     i;
966         int                     naffix = 0;
967         int                     curaffix;
968
969         checkTmpCtx();
970
971         /* compress affixes */
972
973         /* Count the number of different flags used in the dictionary */
974
975         qsort((void *) Conf->Spell, Conf->nspell, sizeof(SPELL *), cmpspellaffix);
976
977         naffix = 0;
978         for (i = 0; i < Conf->nspell; i++)
979         {
980                 if (i == 0 || strncmp(Conf->Spell[i]->p.flag, Conf->Spell[i - 1]->p.flag, MAXFLAGLEN))
981                         naffix++;
982         }
983
984         /*
985          * Fill in Conf->AffixData with the affixes that were used in the
986          * dictionary. Replace textual flag-field of Conf->Spell entries with
987          * indexes into Conf->AffixData array.
988          */
989         Conf->AffixData = (char **) palloc0(naffix * sizeof(char *));
990
991         curaffix = -1;
992         for (i = 0; i < Conf->nspell; i++)
993         {
994                 if (i == 0 || strncmp(Conf->Spell[i]->p.flag, Conf->AffixData[curaffix], MAXFLAGLEN))
995                 {
996                         curaffix++;
997                         Assert(curaffix < naffix);
998                         Conf->AffixData[curaffix] = pstrdup(Conf->Spell[i]->p.flag);
999                 }
1000
1001                 Conf->Spell[i]->p.d.affix = curaffix;
1002                 Conf->Spell[i]->p.d.len = strlen(Conf->Spell[i]->word);
1003         }
1004
1005         Conf->lenAffixData = Conf->nAffixData = naffix;
1006
1007         qsort((void *) Conf->Spell, Conf->nspell, sizeof(SPELL *), cmpspell);
1008         Conf->Dictionary = mkSPNode(Conf, 0, Conf->nspell, 0);
1009
1010         Conf->Spell = NULL;
1011 }
1012
1013 static AffixNode *
1014 mkANode(IspellDict *Conf, int low, int high, int level, int type)
1015 {
1016         int                     i;
1017         int                     nchar = 0;
1018         uint8           lastchar = '\0';
1019         AffixNode  *rs;
1020         AffixNodeData *data;
1021         int                     lownew = low;
1022         int                     naff;
1023         AFFIX     **aff;
1024
1025         for (i = low; i < high; i++)
1026                 if (Conf->Affix[i].replen > level && lastchar != GETCHAR(Conf->Affix + i, level, type))
1027                 {
1028                         nchar++;
1029                         lastchar = GETCHAR(Conf->Affix + i, level, type);
1030                 }
1031
1032         if (!nchar)
1033                 return NULL;
1034
1035         aff = (AFFIX **) tmpalloc(sizeof(AFFIX *) * (high - low + 1));
1036         naff = 0;
1037
1038         rs = (AffixNode *) palloc0(ANHRDSZ + nchar * sizeof(AffixNodeData));
1039         rs->length = nchar;
1040         data = rs->data;
1041
1042         lastchar = '\0';
1043         for (i = low; i < high; i++)
1044                 if (Conf->Affix[i].replen > level)
1045                 {
1046                         if (lastchar != GETCHAR(Conf->Affix + i, level, type))
1047                         {
1048                                 if (lastchar)
1049                                 {
1050                                         data->node = mkANode(Conf, lownew, i, level + 1, type);
1051                                         if (naff)
1052                                         {
1053                                                 data->naff = naff;
1054                                                 data->aff = (AFFIX **) palloc(sizeof(AFFIX *) * naff);
1055                                                 memcpy(data->aff, aff, sizeof(AFFIX *) * naff);
1056                                                 naff = 0;
1057                                         }
1058                                         data++;
1059                                         lownew = i;
1060                                 }
1061                                 lastchar = GETCHAR(Conf->Affix + i, level, type);
1062                         }
1063                         data->val = GETCHAR(Conf->Affix + i, level, type);
1064                         if (Conf->Affix[i].replen == level + 1)
1065                         {                                       /* affix stopped */
1066                                 aff[naff++] = Conf->Affix + i;
1067                         }
1068                 }
1069
1070         data->node = mkANode(Conf, lownew, high, level + 1, type);
1071         if (naff)
1072         {
1073                 data->naff = naff;
1074                 data->aff = (AFFIX **) palloc(sizeof(AFFIX *) * naff);
1075                 memcpy(data->aff, aff, sizeof(AFFIX *) * naff);
1076                 naff = 0;
1077         }
1078
1079         pfree(aff);
1080
1081         return rs;
1082 }
1083
1084 static void
1085 mkVoidAffix(IspellDict *Conf, bool issuffix, int startsuffix)
1086 {
1087         int                     i,
1088                                 cnt = 0;
1089         int                     start = (issuffix) ? startsuffix : 0;
1090         int                     end = (issuffix) ? Conf->naffixes : startsuffix;
1091         AffixNode  *Affix = (AffixNode *) palloc0(ANHRDSZ + sizeof(AffixNodeData));
1092
1093         Affix->length = 1;
1094         Affix->isvoid = 1;
1095
1096         if (issuffix)
1097         {
1098                 Affix->data->node = Conf->Suffix;
1099                 Conf->Suffix = Affix;
1100         }
1101         else
1102         {
1103                 Affix->data->node = Conf->Prefix;
1104                 Conf->Prefix = Affix;
1105         }
1106
1107
1108         for (i = start; i < end; i++)
1109                 if (Conf->Affix[i].replen == 0)
1110                         cnt++;
1111
1112         if (cnt == 0)
1113                 return;
1114
1115         Affix->data->aff = (AFFIX **) palloc(sizeof(AFFIX *) * cnt);
1116         Affix->data->naff = (uint32) cnt;
1117
1118         cnt = 0;
1119         for (i = start; i < end; i++)
1120                 if (Conf->Affix[i].replen == 0)
1121                 {
1122                         Affix->data->aff[cnt] = Conf->Affix + i;
1123                         cnt++;
1124                 }
1125 }
1126
1127 static bool
1128 isAffixInUse(IspellDict *Conf, char flag)
1129 {
1130         int                     i;
1131
1132         for (i = 0; i < Conf->nAffixData; i++)
1133                 if (strchr(Conf->AffixData[i], flag) != NULL)
1134                         return true;
1135
1136         return false;
1137 }
1138
1139 void
1140 NISortAffixes(IspellDict *Conf)
1141 {
1142         AFFIX      *Affix;
1143         size_t          i;
1144         CMPDAffix  *ptr;
1145         int                     firstsuffix = Conf->naffixes;
1146
1147         checkTmpCtx();
1148
1149         if (Conf->naffixes == 0)
1150                 return;
1151
1152         if (Conf->naffixes > 1)
1153                 qsort((void *) Conf->Affix, Conf->naffixes, sizeof(AFFIX), cmpaffix);
1154         Conf->CompoundAffix = ptr = (CMPDAffix *) palloc(sizeof(CMPDAffix) * Conf->naffixes);
1155         ptr->affix = NULL;
1156
1157         for (i = 0; i < Conf->naffixes; i++)
1158         {
1159                 Affix = &(((AFFIX *) Conf->Affix)[i]);
1160                 if (Affix->type == FF_SUFFIX && i < firstsuffix)
1161                         firstsuffix = i;
1162
1163                 if ((Affix->flagflags & FF_COMPOUNDFLAG) && Affix->replen > 0 &&
1164                         isAffixInUse(Conf, (char) Affix->flag))
1165                 {
1166                         if (ptr == Conf->CompoundAffix ||
1167                                 ptr->issuffix != (ptr - 1)->issuffix ||
1168                                 strbncmp((const unsigned char *) (ptr - 1)->affix,
1169                                                  (const unsigned char *) Affix->repl,
1170                                                  (ptr - 1)->len))
1171                         {
1172                                 /* leave only unique and minimals suffixes */
1173                                 ptr->affix = Affix->repl;
1174                                 ptr->len = Affix->replen;
1175                                 ptr->issuffix = (Affix->type == FF_SUFFIX) ? true : false;
1176                                 ptr++;
1177                         }
1178                 }
1179         }
1180         ptr->affix = NULL;
1181         Conf->CompoundAffix = (CMPDAffix *) repalloc(Conf->CompoundAffix, sizeof(CMPDAffix) * (ptr - Conf->CompoundAffix + 1));
1182
1183         Conf->Prefix = mkANode(Conf, 0, firstsuffix, 0, FF_PREFIX);
1184         Conf->Suffix = mkANode(Conf, firstsuffix, Conf->naffixes, 0, FF_SUFFIX);
1185         mkVoidAffix(Conf, true, firstsuffix);
1186         mkVoidAffix(Conf, false, firstsuffix);
1187 }
1188
1189 static AffixNodeData *
1190 FindAffixes(AffixNode *node, const char *word, int wrdlen, int *level, int type)
1191 {
1192         AffixNodeData *StopLow,
1193                            *StopHigh,
1194                            *StopMiddle;
1195         uint8 symbol;
1196
1197         if (node->isvoid)
1198         {                                                       /* search void affixes */
1199                 if (node->data->naff)
1200                         return node->data;
1201                 node = node->data->node;
1202         }
1203
1204         while (node && *level < wrdlen)
1205         {
1206                 StopLow = node->data;
1207                 StopHigh = node->data + node->length;
1208                 while (StopLow < StopHigh)
1209                 {
1210                         StopMiddle = StopLow + ((StopHigh - StopLow) >> 1);
1211                         symbol = GETWCHAR(word, wrdlen, *level, type);
1212
1213                         if (StopMiddle->val == symbol)
1214                         {
1215                                 (*level)++;
1216                                 if (StopMiddle->naff)
1217                                         return StopMiddle;
1218                                 node = StopMiddle->node;
1219                                 break;
1220                         }
1221                         else if (StopMiddle->val < symbol)
1222                                 StopLow = StopMiddle + 1;
1223                         else
1224                                 StopHigh = StopMiddle;
1225                 }
1226                 if (StopLow >= StopHigh)
1227                         break;
1228         }
1229         return NULL;
1230 }
1231
1232 static char *
1233 CheckAffix(const char *word, size_t len, AFFIX *Affix, int flagflags, char *newword, int *baselen)
1234 {
1235         /*
1236          * Check compound allow flags
1237          */
1238
1239         if (flagflags == 0)
1240         {
1241                 if (Affix->flagflags & FF_COMPOUNDONLY)
1242                         return NULL;
1243         }
1244         else if (flagflags & FF_COMPOUNDBEGIN)
1245         {
1246                 if (Affix->flagflags & FF_COMPOUNDFORBIDFLAG)
1247                         return NULL;
1248                 if ((Affix->flagflags & FF_COMPOUNDBEGIN) == 0)
1249                         if (Affix->type == FF_SUFFIX)
1250                                 return NULL;
1251         }
1252         else if (flagflags & FF_COMPOUNDMIDDLE)
1253         {
1254                 if ((Affix->flagflags & FF_COMPOUNDMIDDLE) == 0 ||
1255                         (Affix->flagflags & FF_COMPOUNDFORBIDFLAG))
1256                         return NULL;
1257         }
1258         else if (flagflags & FF_COMPOUNDLAST)
1259         {
1260                 if (Affix->flagflags & FF_COMPOUNDFORBIDFLAG)
1261                         return NULL;
1262                 if ((Affix->flagflags & FF_COMPOUNDLAST) == 0)
1263                         if (Affix->type == FF_PREFIX)
1264                                 return NULL;
1265         }
1266
1267         /*
1268          * make replace pattern of affix
1269          */
1270         if (Affix->type == FF_SUFFIX)
1271         {
1272                 strcpy(newword, word);
1273                 strcpy(newword + len - Affix->replen, Affix->find);
1274                 if (baselen)                    /* store length of non-changed part of word */
1275                         *baselen = len - Affix->replen;
1276         }
1277         else
1278         {
1279                 /*
1280                  * if prefix is a all non-chaged part's length then all word contains
1281                  * only prefix and suffix, so out
1282                  */
1283                 if (baselen && *baselen + strlen(Affix->find) <= Affix->replen)
1284                         return NULL;
1285                 strcpy(newword, Affix->find);
1286                 strcat(newword, word + Affix->replen);
1287         }
1288
1289         /*
1290          * check resulting word
1291          */
1292         if (Affix->issimple)
1293                 return newword;
1294         else if (Affix->isregis)
1295         {
1296                 if (RS_execute(&(Affix->reg.regis), newword))
1297                         return newword;
1298         }
1299         else
1300         {
1301                 int                     err;
1302                 pg_wchar   *data;
1303                 size_t          data_len;
1304                 int                     newword_len;
1305
1306                 /* Convert data string to wide characters */
1307                 newword_len = strlen(newword);
1308                 data = (pg_wchar *) palloc((newword_len + 1) * sizeof(pg_wchar));
1309                 data_len = pg_mb2wchar_with_len(newword, data, newword_len);
1310
1311                 if (!(err = pg_regexec(&(Affix->reg.regex), data, data_len, 0, NULL, 0, NULL, 0)))
1312                 {
1313                         pfree(data);
1314                         return newword;
1315                 }
1316                 pfree(data);
1317         }
1318
1319         return NULL;
1320 }
1321
1322 static int
1323 addToResult(char **forms, char **cur, char *word)
1324 {
1325         if (cur - forms >= MAX_NORM - 1)
1326                 return 0;
1327         if (forms == cur || strcmp(word, *(cur - 1)) != 0)
1328         {
1329                 *cur = pstrdup(word);
1330                 *(cur+1) = NULL;
1331                 return 1;
1332         }
1333
1334         return 0;
1335 }
1336
1337 static char **
1338 NormalizeSubWord(IspellDict *Conf, char *word, int flag)
1339 {
1340         AffixNodeData *suffix = NULL,
1341                            *prefix = NULL;
1342         int                     slevel = 0,
1343                                 plevel = 0;
1344         int                     wrdlen = strlen(word),
1345                                 swrdlen;
1346         char      **forms;
1347         char      **cur;
1348         char            newword[2 * MAXNORMLEN] = "";
1349         char            pnewword[2 * MAXNORMLEN] = "";
1350         AffixNode  *snode = Conf->Suffix,
1351                            *pnode;
1352         int                     i,
1353                                 j;
1354
1355         if (wrdlen > MAXNORMLEN)
1356                 return NULL;
1357         cur = forms = (char **) palloc(MAX_NORM * sizeof(char *));
1358         *cur = NULL;
1359
1360
1361         /* Check that the word itself is normal form */
1362         if (FindWord(Conf, word, 0, flag))
1363         {
1364                 *cur = pstrdup(word);
1365                 cur++;
1366                 *cur = NULL;
1367         }
1368
1369         /* Find all other NORMAL forms of the 'word' (check only prefix) */
1370         pnode = Conf->Prefix;
1371         plevel = 0;
1372         while (pnode)
1373         {
1374                 prefix = FindAffixes(pnode, word, wrdlen, &plevel, FF_PREFIX);
1375                 if (!prefix)
1376                         break;
1377                 for (j = 0; j < prefix->naff; j++)
1378                 {
1379                         if (CheckAffix(word, wrdlen, prefix->aff[j], flag, newword, NULL))
1380                         {
1381                                 /* prefix success */
1382                                 if (FindWord(Conf, newword, prefix->aff[j]->flag, flag))
1383                                         cur += addToResult(forms, cur, newword);
1384                         }
1385                 }
1386                 pnode = prefix->node;
1387         }
1388
1389         /*
1390          * Find all other NORMAL forms of the 'word' (check suffix and then
1391          * prefix)
1392          */
1393         while (snode)
1394         {
1395                 int                     baselen = 0;
1396
1397                 /* find possible suffix */
1398                 suffix = FindAffixes(snode, word, wrdlen, &slevel, FF_SUFFIX);
1399                 if (!suffix)
1400                         break;
1401                 /* foreach suffix check affix */
1402                 for (i = 0; i < suffix->naff; i++)
1403                 {
1404                         if (CheckAffix(word, wrdlen, suffix->aff[i], flag, newword, &baselen))
1405                         {
1406                                 /* suffix success */
1407                                 if (FindWord(Conf, newword, suffix->aff[i]->flag, flag))
1408                                         cur += addToResult(forms, cur, newword);
1409
1410                                 /* now we will look changed word with prefixes */
1411                                 pnode = Conf->Prefix;
1412                                 plevel = 0;
1413                                 swrdlen = strlen(newword);
1414                                 while (pnode)
1415                                 {
1416                                         prefix = FindAffixes(pnode, newword, swrdlen, &plevel, FF_PREFIX);
1417                                         if (!prefix)
1418                                                 break;
1419                                         for (j = 0; j < prefix->naff; j++)
1420                                         {
1421                                                 if (CheckAffix(newword, swrdlen, prefix->aff[j], flag, pnewword, &baselen))
1422                                                 {
1423                                                         /* prefix success */
1424                                                         int                     ff = (prefix->aff[j]->flagflags & suffix->aff[i]->flagflags & FF_CROSSPRODUCT) ?
1425                                                         0 : prefix->aff[j]->flag;
1426
1427                                                         if (FindWord(Conf, pnewword, ff, flag))
1428                                                                 cur += addToResult(forms, cur, pnewword);
1429                                                 }
1430                                         }
1431                                         pnode = prefix->node;
1432                                 }
1433                         }
1434                 }
1435
1436                 snode = suffix->node;
1437         }
1438
1439         if (cur == forms)
1440         {
1441                 pfree(forms);
1442                 return (NULL);
1443         }
1444         return (forms);
1445 }
1446
1447 typedef struct SplitVar
1448 {
1449         int                     nstem;
1450         int                     lenstem;
1451         char      **stem;
1452         struct SplitVar *next;
1453 } SplitVar;
1454
1455 static int
1456 CheckCompoundAffixes(CMPDAffix **ptr, char *word, int len, bool CheckInPlace)
1457 {
1458         bool            issuffix;
1459
1460         if (CheckInPlace)
1461         {
1462                 while ((*ptr)->affix)
1463                 {
1464                         if (len > (*ptr)->len && strncmp((*ptr)->affix, word, (*ptr)->len) == 0)
1465                         {
1466                                 len = (*ptr)->len;
1467                                 issuffix = (*ptr)->issuffix;
1468                                 (*ptr)++;
1469                                 return (issuffix) ? len : 0;
1470                         }
1471                         (*ptr)++;
1472                 }
1473         }
1474         else
1475         {
1476                 char       *affbegin;
1477
1478                 while ((*ptr)->affix)
1479                 {
1480                         if (len > (*ptr)->len && (affbegin = strstr(word, (*ptr)->affix)) != NULL)
1481                         {
1482                                 len = (*ptr)->len + (affbegin - word);
1483                                 issuffix = (*ptr)->issuffix;
1484                                 (*ptr)++;
1485                                 return (issuffix) ? len : 0;
1486                         }
1487                         (*ptr)++;
1488                 }
1489         }
1490         return -1;
1491 }
1492
1493 static SplitVar *
1494 CopyVar(SplitVar *s, int makedup)
1495 {
1496         SplitVar   *v = (SplitVar *) palloc(sizeof(SplitVar));
1497
1498         v->next = NULL;
1499         if (s)
1500         {
1501                 int                     i;
1502
1503                 v->lenstem = s->lenstem;
1504                 v->stem = (char **) palloc(sizeof(char *) * v->lenstem);
1505                 v->nstem = s->nstem;
1506                 for (i = 0; i < s->nstem; i++)
1507                         v->stem[i] = (makedup) ? pstrdup(s->stem[i]) : s->stem[i];
1508         }
1509         else
1510         {
1511                 v->lenstem = 16;
1512                 v->stem = (char **) palloc(sizeof(char *) * v->lenstem);
1513                 v->nstem = 0;
1514         }
1515         return v;
1516 }
1517
1518 static void
1519 AddStem(SplitVar *v, char *word)
1520 {
1521         if ( v->nstem >= v->lenstem )
1522         {
1523                 v->lenstem *= 2;
1524                 v->stem = (char **) repalloc(v->stem, sizeof(char *) * v->lenstem);
1525         }
1526
1527         v->stem[v->nstem] = word;
1528         v->nstem++;
1529 }
1530
1531 static SplitVar *
1532 SplitToVariants(IspellDict *Conf, SPNode *snode, SplitVar *orig, char *word, int wordlen, int startpos, int minpos)
1533 {
1534         SplitVar   *var = NULL;
1535         SPNodeData *StopLow,
1536                            *StopHigh,
1537                            *StopMiddle = NULL;
1538         SPNode     *node = (snode) ? snode : Conf->Dictionary;
1539         int                     level = (snode) ? minpos : startpos;    /* recursive
1540                                                                                                                  * minpos==level */
1541         int                     lenaff;
1542         CMPDAffix  *caff;
1543         char       *notprobed;
1544         int                     compoundflag = 0;
1545
1546         notprobed = (char *) palloc(wordlen);
1547         memset(notprobed, 1, wordlen);
1548         var = CopyVar(orig, 1);
1549
1550         while (level < wordlen)
1551         {
1552                 /* find word with epenthetic or/and compound affix */
1553                 caff = Conf->CompoundAffix;
1554                 while (level > startpos && (lenaff = CheckCompoundAffixes(&caff, word + level, wordlen - level, (node) ? true : false)) >= 0)
1555                 {
1556                         /*
1557                          * there is one of compound affixes, so check word for existings
1558                          */
1559                         char            buf[MAXNORMLEN];
1560                         char      **subres;
1561
1562                         lenaff = level - startpos + lenaff;
1563
1564                         if (!notprobed[startpos + lenaff - 1])
1565                                 continue;
1566
1567                         if (level + lenaff - 1 <= minpos)
1568                                 continue;
1569
1570                         if ( lenaff >= MAXNORMLEN )
1571                                 continue; /* skip too big value */
1572                         if (lenaff > 0)
1573                                 memcpy(buf, word + startpos, lenaff);
1574                         buf[lenaff] = '\0';
1575
1576                         if (level == 0)
1577                                 compoundflag = FF_COMPOUNDBEGIN;
1578                         else if (level == wordlen - 1)
1579                                 compoundflag = FF_COMPOUNDLAST;
1580                         else
1581                                 compoundflag = FF_COMPOUNDMIDDLE;
1582                         subres = NormalizeSubWord(Conf, buf, compoundflag);
1583                         if (subres)
1584                         {
1585                                 /* Yes, it was a word from dictionary */
1586                                 SplitVar   *new = CopyVar(var, 0);
1587                                 SplitVar   *ptr = var;
1588                                 char      **sptr = subres;
1589
1590                                 notprobed[startpos + lenaff - 1] = 0;
1591
1592                                 while (*sptr)
1593                                 {
1594                                         AddStem( new, *sptr ); 
1595                                         sptr++;
1596                                 }
1597                                 pfree(subres);
1598
1599                                 while (ptr->next)
1600                                         ptr = ptr->next;
1601                                 ptr->next = SplitToVariants(Conf, NULL, new, word, wordlen, startpos + lenaff, startpos + lenaff);
1602
1603                                 pfree(new->stem);
1604                                 pfree(new);
1605                         }
1606                 }
1607
1608                 if (!node)
1609                         break;
1610
1611                 StopLow = node->data;
1612                 StopHigh = node->data + node->length;
1613                 while (StopLow < StopHigh)
1614                 {
1615                         StopMiddle = StopLow + ((StopHigh - StopLow) >> 1);
1616                         if (StopMiddle->val == ((uint8 *) (word))[level])
1617                                 break;
1618                         else if (StopMiddle->val < ((uint8 *) (word))[level])
1619                                 StopLow = StopMiddle + 1;
1620                         else
1621                                 StopHigh = StopMiddle;
1622                 }
1623
1624                 if (StopLow < StopHigh)
1625                 {
1626                         if (level == FF_COMPOUNDBEGIN)
1627                                 compoundflag = FF_COMPOUNDBEGIN;
1628                         else if (level == wordlen - 1)
1629                                 compoundflag = FF_COMPOUNDLAST;
1630                         else
1631                                 compoundflag = FF_COMPOUNDMIDDLE;
1632
1633                         /* find infinitive */
1634                         if (StopMiddle->isword &&
1635                                 (StopMiddle->compoundflag & compoundflag) &&
1636                                 notprobed[level])
1637                         {
1638                                 /* ok, we found full compoundallowed word */
1639                                 if (level > minpos)
1640                                 {
1641                                         /* and its length more than minimal */
1642                                         if (wordlen == level + 1)
1643                                         {
1644                                                 /* well, it was last word */
1645                                                 AddStem( var, pnstrdup(word + startpos, wordlen - startpos) );
1646                                                 pfree(notprobed);
1647                                                 return var;
1648                                         }
1649                                         else
1650                                         {
1651                                                 /* then we will search more big word at the same point */
1652                                                 SplitVar   *ptr = var;
1653
1654                                                 while (ptr->next)
1655                                                         ptr = ptr->next;
1656                                                 ptr->next = SplitToVariants(Conf, node, var, word, wordlen, startpos, level);
1657                                                 /* we can find next word */
1658                                                 level++;
1659                                                 AddStem( var, pnstrdup(word + startpos, level - startpos) );
1660                                                 node = Conf->Dictionary;
1661                                                 startpos = level;
1662                                                 continue;
1663                                         }
1664                                 }
1665                         }
1666                         node = StopMiddle->node;
1667                 }
1668                 else
1669                         node = NULL;
1670                 level++;
1671         }
1672
1673         AddStem( var, pnstrdup(word + startpos, wordlen - startpos) );
1674         pfree(notprobed);
1675         return var;
1676 }
1677
1678 static void
1679 addNorm( TSLexeme **lres, TSLexeme **lcur, char *word, int flags, uint16 NVariant)
1680 {
1681         if ( *lres == NULL ) 
1682                 *lcur = *lres = (TSLexeme *) palloc(MAX_NORM * sizeof(TSLexeme));
1683
1684         if ( *lcur - *lres < MAX_NORM-1 ) { 
1685                 (*lcur)->lexeme = word;
1686                 (*lcur)->flags = flags;
1687                 (*lcur)->nvariant = NVariant;
1688                 (*lcur)++;
1689                 (*lcur)->lexeme = NULL;
1690         }
1691 }
1692
1693 TSLexeme *
1694 NINormalizeWord(IspellDict *Conf, char *word)
1695 {
1696         char      **res;
1697         TSLexeme   *lcur = NULL,
1698                            *lres = NULL;
1699         uint16          NVariant = 1;
1700
1701         res = NormalizeSubWord(Conf, word, 0);
1702
1703         if (res)
1704         {
1705                 char      **ptr = res;
1706
1707                 while (*ptr && (lcur-lres) < MAX_NORM)
1708                 {
1709                         addNorm( &lres, &lcur, *ptr, 0, NVariant++);
1710                         ptr++;
1711                 }
1712                 pfree(res);
1713         }
1714
1715         if (Conf->usecompound)
1716         {
1717                 int                     wordlen = strlen(word);
1718                 SplitVar   *ptr,
1719                                    *var = SplitToVariants(Conf, NULL, NULL, word, wordlen, 0, -1);
1720                 int                     i;
1721
1722                 while (var)
1723                 {
1724                         if (var->nstem > 1)
1725                         {
1726                                 char      **subres = NormalizeSubWord(Conf, var->stem[var->nstem - 1], FF_COMPOUNDLAST);
1727
1728                                 if (subres)
1729                                 {
1730                                         char      **subptr = subres;
1731
1732                                         while (*subptr)
1733                                         {
1734                                                 for (i = 0; i < var->nstem - 1; i++)
1735                                                 {
1736                                                         addNorm( &lres, &lcur, (subptr == subres) ? var->stem[i] : pstrdup(var->stem[i]), 0, NVariant); 
1737                                                 }
1738
1739                                                 addNorm( &lres, &lcur, *subptr, 0, NVariant); 
1740                                                 subptr++;
1741                                                 NVariant++;
1742                                         }
1743
1744                                         pfree(subres);
1745                                         var->stem[0] = NULL;
1746                                         pfree(var->stem[var->nstem - 1]);
1747                                 }
1748                         }
1749
1750                         for (i = 0; i < var->nstem && var->stem[i]; i++)
1751                                 pfree(var->stem[i]);
1752                         ptr = var->next;
1753                         pfree(var->stem);
1754                         pfree(var);
1755                         var = ptr;
1756                 }
1757         }
1758
1759         return lres;
1760 }