]> granicus.if.org Git - postgresql/blob - src/backend/tsearch/spell.c
Improve error reporting for problems in text search configuration files
[postgresql] / src / backend / tsearch / spell.c
1 /*-------------------------------------------------------------------------
2  *
3  * spell.c
4  *              Normalizing word with ISpell
5  *
6  * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
7  *
8  *
9  * IDENTIFICATION
10  *        $PostgreSQL: pgsql/src/backend/tsearch/spell.c,v 1.12 2008/06/18 20:55:42 tgl Exp $
11  *
12  *-------------------------------------------------------------------------
13  */
14
15 #include "postgres.h"
16
17 #include "tsearch/dicts/spell.h"
18 #include "tsearch/ts_locale.h"
19 #include "utils/memutils.h"
20
21
22 /*
23  * Initialization requires a lot of memory that's not needed
24  * after the initialization is done.  In init function,
25  * CurrentMemoryContext is a long lived memory context associated
26  * with the dictionary cache entry, so we use a temporary context
27  * for the short-lived stuff.
28  */
29 static MemoryContext tmpCtx = NULL;
30
31 #define tmpalloc(sz)  MemoryContextAlloc(tmpCtx, (sz))
32 #define tmpalloc0(sz)  MemoryContextAllocZero(tmpCtx, (sz))
33
34 static void
35 checkTmpCtx(void)
36 {
37         /*
38          * XXX: This assumes that CurrentMemoryContext doesn't have any children
39          * other than the one we create here.
40          */
41         if (CurrentMemoryContext->firstchild == NULL)
42         {
43                 tmpCtx = AllocSetContextCreate(CurrentMemoryContext,
44                                                                            "Ispell dictionary init context",
45                                                                            ALLOCSET_DEFAULT_MINSIZE,
46                                                                            ALLOCSET_DEFAULT_INITSIZE,
47                                                                            ALLOCSET_DEFAULT_MAXSIZE);
48         }
49         else
50                 tmpCtx = CurrentMemoryContext->firstchild;
51 }
52
53 static char *
54 lowerstr_ctx(char *src)
55 {
56         MemoryContext saveCtx;
57         char       *dst;
58
59         saveCtx = MemoryContextSwitchTo(tmpCtx);
60         dst = lowerstr(src);
61         MemoryContextSwitchTo(saveCtx);
62
63         return dst;
64 }
65
66 #define MAX_NORM 1024
67 #define MAXNORMLEN 256
68
69 #define STRNCMP(s,p)    strncmp( (s), (p), strlen(p) )
70 #define GETWCHAR(W,L,N,T) ( ((uint8*)(W))[ ((T)==FF_PREFIX) ? (N) : ( (L) - 1 - (N) ) ] )
71 #define GETCHAR(A,N,T)    GETWCHAR( (A)->repl, (A)->replen, N, T )
72
73 static char *VoidString = "";
74
75 static int
76 cmpspell(const void *s1, const void *s2)
77 {
78         return (strcmp((*(const SPELL **) s1)->word, (*(const SPELL **) s2)->word));
79 }
80 static int
81 cmpspellaffix(const void *s1, const void *s2)
82 {
83         return (strncmp((*(const SPELL **) s1)->p.flag, (*(const SPELL **) s2)->p.flag, MAXFLAGLEN));
84 }
85
86 static char *
87 findchar(char *str, int c)
88 {
89         while (*str)
90         {
91                 if (t_iseq(str, c))
92                         return str;
93                 str += pg_mblen(str);
94         }
95
96         return NULL;
97 }
98
99
100 /* backward string compare for suffix tree operations */
101 static int
102 strbcmp(const unsigned char *s1, const unsigned char *s2)
103 {
104         int                     l1 = strlen((const char *) s1) - 1,
105                                 l2 = strlen((const char *) s2) - 1;
106
107         while (l1 >= 0 && l2 >= 0)
108         {
109                 if (s1[l1] < s2[l2])
110                         return -1;
111                 if (s1[l1] > s2[l2])
112                         return 1;
113                 l1--;
114                 l2--;
115         }
116         if (l1 < l2)
117                 return -1;
118         if (l1 > l2)
119                 return 1;
120
121         return 0;
122 }
123 static int
124 strbncmp(const unsigned char *s1, const unsigned char *s2, size_t count)
125 {
126         int                     l1 = strlen((const char *) s1) - 1,
127                                 l2 = strlen((const char *) s2) - 1,
128                                 l = count;
129
130         while (l1 >= 0 && l2 >= 0 && l > 0)
131         {
132                 if (s1[l1] < s2[l2])
133                         return -1;
134                 if (s1[l1] > s2[l2])
135                         return 1;
136                 l1--;
137                 l2--;
138                 l--;
139         }
140         if (l == 0)
141                 return 0;
142         if (l1 < l2)
143                 return -1;
144         if (l1 > l2)
145                 return 1;
146         return 0;
147 }
148
149 static int
150 cmpaffix(const void *s1, const void *s2)
151 {
152         const AFFIX *a1 = (const AFFIX *) s1;
153         const AFFIX *a2 = (const AFFIX *) s2;
154
155         if (a1->type < a2->type)
156                 return -1;
157         if (a1->type > a2->type)
158                 return 1;
159         if (a1->type == FF_PREFIX)
160                 return strcmp(a1->repl, a2->repl);
161         else
162                 return strbcmp((const unsigned char *) a1->repl,
163                                            (const unsigned char *) a2->repl);
164 }
165
166 static void
167 NIAddSpell(IspellDict *Conf, const char *word, const char *flag)
168 {
169         if (Conf->nspell >= Conf->mspell)
170         {
171                 if (Conf->mspell)
172                 {
173                         Conf->mspell += 1024 * 20;
174                         Conf->Spell = (SPELL **) repalloc(Conf->Spell, Conf->mspell * sizeof(SPELL *));
175                 }
176                 else
177                 {
178                         Conf->mspell = 1024 * 20;
179                         Conf->Spell = (SPELL **) tmpalloc(Conf->mspell * sizeof(SPELL *));
180                 }
181         }
182         Conf->Spell[Conf->nspell] = (SPELL *) tmpalloc(SPELLHDRSZ + strlen(word) + 1);
183         strcpy(Conf->Spell[Conf->nspell]->word, word);
184         strncpy(Conf->Spell[Conf->nspell]->p.flag, flag, MAXFLAGLEN);
185         Conf->nspell++;
186 }
187
188 /*
189  * import dictionary
190  *
191  * Note caller must already have applied get_tsearch_config_filename
192  */
193 void
194 NIImportDictionary(IspellDict *Conf, const char *filename)
195 {
196         tsearch_readline_state trst;
197         char       *line;
198
199         checkTmpCtx();
200
201         if (!tsearch_readline_begin(&trst, filename))
202                 ereport(ERROR,
203                                 (errcode(ERRCODE_CONFIG_FILE_ERROR),
204                                  errmsg("could not open dictionary file \"%s\": %m",
205                                                 filename)));
206
207         while ((line = tsearch_readline(&trst)) != NULL)
208         {
209                 char       *s,
210                                    *pstr;
211                 const char *flag;
212
213                 /* Extract flag from the line */
214                 flag = NULL;
215                 if ((s = findchar(line, '/')))
216                 {
217                         *s++ = '\0';
218                         flag = s;
219                         while (*s)
220                         {
221                                 /* we allow only single encoded flags for faster works */
222                                 if (pg_mblen(s) == 1 && t_isprint(s) && !t_isspace(s))
223                                         s++;
224                                 else
225                                 {
226                                         *s = '\0';
227                                         break;
228                                 }
229                         }
230                 }
231                 else
232                         flag = "";
233
234                 /* Remove trailing spaces */
235                 s = line;
236                 while (*s)
237                 {
238                         if (t_isspace(s))
239                         {
240                                 *s = '\0';
241                                 break;
242                         }
243                         s += pg_mblen(s);
244                 }
245                 pstr = lowerstr_ctx(line);
246
247                 NIAddSpell(Conf, pstr, flag);
248                 pfree(pstr);
249
250                 pfree(line);
251         }
252         tsearch_readline_end(&trst);
253 }
254
255
256 static int
257 FindWord(IspellDict *Conf, const char *word, int affixflag, int flag)
258 {
259         SPNode     *node = Conf->Dictionary;
260         SPNodeData *StopLow,
261                            *StopHigh,
262                            *StopMiddle;
263         uint8      *ptr = (uint8 *) word;
264
265         flag &= FF_DICTFLAGMASK;
266
267         while (node && *ptr)
268         {
269                 StopLow = node->data;
270                 StopHigh = node->data + node->length;
271                 while (StopLow < StopHigh)
272                 {
273                         StopMiddle = StopLow + ((StopHigh - StopLow) >> 1);
274                         if (StopMiddle->val == *ptr)
275                         {
276                                 if (*(ptr + 1) == '\0' && StopMiddle->isword)
277                                 {
278                                         if (flag == 0)
279                                         {
280                                                 if (StopMiddle->compoundflag & FF_COMPOUNDONLY)
281                                                         return 0;
282                                         }
283                                         else if ((flag & StopMiddle->compoundflag) == 0)
284                                                 return 0;
285
286                                         if ((affixflag == 0) || (strchr(Conf->AffixData[StopMiddle->affix], affixflag) != NULL))
287                                                 return 1;
288                                 }
289                                 node = StopMiddle->node;
290                                 ptr++;
291                                 break;
292                         }
293                         else if (StopMiddle->val < *ptr)
294                                 StopLow = StopMiddle + 1;
295                         else
296                                 StopHigh = StopMiddle;
297                 }
298                 if (StopLow >= StopHigh)
299                         break;
300         }
301         return 0;
302 }
303
304 static void
305 NIAddAffix(IspellDict *Conf, int flag, char flagflags, const char *mask, const char *find, const char *repl, int type)
306 {
307         AFFIX      *Affix;
308
309         if (Conf->naffixes >= Conf->maffixes)
310         {
311                 if (Conf->maffixes)
312                 {
313                         Conf->maffixes += 16;
314                         Conf->Affix = (AFFIX *) repalloc((void *) Conf->Affix, Conf->maffixes * sizeof(AFFIX));
315                 }
316                 else
317                 {
318                         Conf->maffixes = 16;
319                         Conf->Affix = (AFFIX *) palloc(Conf->maffixes * sizeof(AFFIX));
320                 }
321         }
322
323         Affix = Conf->Affix + Conf->naffixes;
324
325         if (strcmp(mask, ".") == 0)
326         {
327                 Affix->issimple = 1;
328                 Affix->isregis = 0;
329         }
330         else if (RS_isRegis(mask))
331         {
332                 Affix->issimple = 0;
333                 Affix->isregis = 1;
334                 RS_compile(&(Affix->reg.regis), (type == FF_SUFFIX) ? true : false,
335                                    (mask && *mask) ? mask : VoidString);
336         }
337         else
338         {
339                 int                     masklen;
340                 int                     wmasklen;
341                 int                     err;
342                 pg_wchar   *wmask;
343                 char       *tmask;
344
345                 Affix->issimple = 0;
346                 Affix->isregis = 0;
347                 tmask = (char *) tmpalloc(strlen(mask) + 3);
348                 if (type == FF_SUFFIX)
349                         sprintf(tmask, "%s$", mask);
350                 else
351                         sprintf(tmask, "^%s", mask);
352
353                 masklen = strlen(tmask);
354                 wmask = (pg_wchar *) tmpalloc((masklen + 1) * sizeof(pg_wchar));
355                 wmasklen = pg_mb2wchar_with_len(tmask, wmask, masklen);
356
357                 err = pg_regcomp(&(Affix->reg.regex), wmask, wmasklen, REG_ADVANCED | REG_NOSUB);
358                 if (err)
359                 {
360                         char            errstr[100];
361
362                         pg_regerror(err, &(Affix->reg.regex), errstr, sizeof(errstr));
363                         ereport(ERROR,
364                                         (errcode(ERRCODE_INVALID_REGULAR_EXPRESSION),
365                                          errmsg("invalid regular expression: %s", errstr)));
366                 }
367         }
368
369         Affix->flagflags = flagflags;
370         if ((Affix->flagflags & FF_COMPOUNDONLY) || (Affix->flagflags & FF_COMPOUNDPERMITFLAG))
371         {
372                 if ((Affix->flagflags & FF_COMPOUNDFLAG) == 0)
373                         Affix->flagflags |= FF_COMPOUNDFLAG;
374         }
375         Affix->flag = flag;
376         Affix->type = type;
377
378         Affix->find = (find && *find) ? pstrdup(find) : VoidString;
379         if ((Affix->replen = strlen(repl)) > 0)
380                 Affix->repl = pstrdup(repl);
381         else
382                 Affix->repl = VoidString;
383         Conf->naffixes++;
384 }
385
386 #define PAE_WAIT_MASK   0
387 #define PAE_INMASK      1
388 #define PAE_WAIT_FIND   2
389 #define PAE_INFIND      3
390 #define PAE_WAIT_REPL   4
391 #define PAE_INREPL      5
392
393 static bool
394 parse_affentry(char *str, char *mask, char *find, char *repl)
395 {
396         int                     state = PAE_WAIT_MASK;
397         char       *pmask = mask,
398                            *pfind = find,
399                            *prepl = repl;
400
401         *mask = *find = *repl = '\0';
402
403         while (*str)
404         {
405                 if (state == PAE_WAIT_MASK)
406                 {
407                         if (t_iseq(str, '#'))
408                                 return false;
409                         else if (!t_isspace(str))
410                         {
411                                 COPYCHAR(pmask, str);
412                                 pmask += pg_mblen(str);
413                                 state = PAE_INMASK;
414                         }
415                 }
416                 else if (state == PAE_INMASK)
417                 {
418                         if (t_iseq(str, '>'))
419                         {
420                                 *pmask = '\0';
421                                 state = PAE_WAIT_FIND;
422                         }
423                         else if (!t_isspace(str))
424                         {
425                                 COPYCHAR(pmask, str);
426                                 pmask += pg_mblen(str);
427                         }
428                 }
429                 else if (state == PAE_WAIT_FIND)
430                 {
431                         if (t_iseq(str, '-'))
432                         {
433                                 state = PAE_INFIND;
434                         }
435                         else if (t_isalpha(str) || t_iseq(str, '\'') /* english 's */ )
436                         {
437                                 COPYCHAR(prepl, str);
438                                 prepl += pg_mblen(str);
439                                 state = PAE_INREPL;
440                         }
441                         else if (!t_isspace(str))
442                                 ereport(ERROR,
443                                                 (errcode(ERRCODE_CONFIG_FILE_ERROR),
444                                                  errmsg("syntax error")));
445                 }
446                 else if (state == PAE_INFIND)
447                 {
448                         if (t_iseq(str, ','))
449                         {
450                                 *pfind = '\0';
451                                 state = PAE_WAIT_REPL;
452                         }
453                         else if (t_isalpha(str))
454                         {
455                                 COPYCHAR(pfind, str);
456                                 pfind += pg_mblen(str);
457                         }
458                         else if (!t_isspace(str))
459                                 ereport(ERROR,
460                                                 (errcode(ERRCODE_CONFIG_FILE_ERROR),
461                                                  errmsg("syntax error")));
462                 }
463                 else if (state == PAE_WAIT_REPL)
464                 {
465                         if (t_iseq(str, '-'))
466                         {
467                                 break;                  /* void repl */
468                         }
469                         else if (t_isalpha(str))
470                         {
471                                 COPYCHAR(prepl, str);
472                                 prepl += pg_mblen(str);
473                                 state = PAE_INREPL;
474                         }
475                         else if (!t_isspace(str))
476                                 ereport(ERROR,
477                                                 (errcode(ERRCODE_CONFIG_FILE_ERROR),
478                                                  errmsg("syntax error")));
479                 }
480                 else if (state == PAE_INREPL)
481                 {
482                         if (t_iseq(str, '#'))
483                         {
484                                 *prepl = '\0';
485                                 break;
486                         }
487                         else if (t_isalpha(str))
488                         {
489                                 COPYCHAR(prepl, str);
490                                 prepl += pg_mblen(str);
491                         }
492                         else if (!t_isspace(str))
493                                 ereport(ERROR,
494                                                 (errcode(ERRCODE_CONFIG_FILE_ERROR),
495                                                  errmsg("syntax error")));
496                 }
497                 else
498                         elog(ERROR, "unrecognized state in parse_affentry: %d", state);
499
500                 str += pg_mblen(str);
501         }
502
503         *pmask = *pfind = *prepl = '\0';
504
505         return (*mask && (*find || *repl)) ? true : false;
506 }
507
508 static void
509 addFlagValue(IspellDict *Conf, char *s, uint32 val)
510 {
511         while (*s && t_isspace(s))
512                 s++;
513
514         if (!*s)
515                 ereport(ERROR,
516                                 (errcode(ERRCODE_CONFIG_FILE_ERROR),
517                                  errmsg("syntax error")));
518
519         if (pg_mblen(s) != 1)
520                 ereport(ERROR,
521                                 (errcode(ERRCODE_CONFIG_FILE_ERROR),
522                                  errmsg("multibyte flag character is not allowed")));
523
524         Conf->flagval[(unsigned int) *s] = (unsigned char) val;
525         Conf->usecompound = true;
526 }
527
528 static void
529 NIImportOOAffixes(IspellDict *Conf, const char *filename)
530 {
531         char            type[BUFSIZ],
532                            *ptype = NULL;
533         char            sflag[BUFSIZ];
534         char            mask[BUFSIZ],
535                            *pmask;
536         char            find[BUFSIZ],
537                            *pfind;
538         char            repl[BUFSIZ],
539                            *prepl;
540         bool            isSuffix = false;
541         int                     flag = 0;
542         char            flagflags = 0;
543         tsearch_readline_state trst;
544         int                     scanread = 0;
545         char            scanbuf[BUFSIZ];
546         char       *recoded;
547
548         checkTmpCtx();
549
550         /* read file to find any flag */
551         memset(Conf->flagval, 0, sizeof(Conf->flagval));
552         Conf->usecompound = false;
553
554         if (!tsearch_readline_begin(&trst, filename))
555                 ereport(ERROR,
556                                 (errcode(ERRCODE_CONFIG_FILE_ERROR),
557                                  errmsg("could not open affix file \"%s\": %m",
558                                                 filename)));
559
560         while ((recoded = tsearch_readline(&trst)) != NULL)
561         {
562                 if (*recoded == '\0' || t_isspace(recoded) || t_iseq(recoded, '#'))
563                 {
564                         pfree(recoded);
565                         continue;
566                 }
567
568                 if (STRNCMP(recoded, "COMPOUNDFLAG") == 0)
569                         addFlagValue(Conf, recoded + strlen("COMPOUNDFLAG"),
570                                                  FF_COMPOUNDFLAG);
571                 else if (STRNCMP(recoded, "COMPOUNDBEGIN") == 0)
572                         addFlagValue(Conf, recoded + strlen("COMPOUNDBEGIN"),
573                                                  FF_COMPOUNDBEGIN);
574                 else if (STRNCMP(recoded, "COMPOUNDLAST") == 0)
575                         addFlagValue(Conf, recoded + strlen("COMPOUNDLAST"),
576                                                  FF_COMPOUNDLAST);
577                 /* COMPOUNDLAST and COMPOUNDEND are synonyms */
578                 else if (STRNCMP(recoded, "COMPOUNDEND") == 0)
579                         addFlagValue(Conf, recoded + strlen("COMPOUNDEND"),
580                                                  FF_COMPOUNDLAST);
581                 else if (STRNCMP(recoded, "COMPOUNDMIDDLE") == 0)
582                         addFlagValue(Conf, recoded + strlen("COMPOUNDMIDDLE"),
583                                                  FF_COMPOUNDMIDDLE);
584                 else if (STRNCMP(recoded, "ONLYINCOMPOUND") == 0)
585                         addFlagValue(Conf, recoded + strlen("ONLYINCOMPOUND"),
586                                                  FF_COMPOUNDONLY);
587                 else if (STRNCMP(recoded, "COMPOUNDPERMITFLAG") == 0)
588                         addFlagValue(Conf, recoded + strlen("COMPOUNDPERMITFLAG"),
589                                                  FF_COMPOUNDPERMITFLAG);
590                 else if (STRNCMP(recoded, "COMPOUNDFORBIDFLAG") == 0)
591                         addFlagValue(Conf, recoded + strlen("COMPOUNDFORBIDFLAG"),
592                                                  FF_COMPOUNDFORBIDFLAG);
593                 else if (STRNCMP(recoded, "FLAG") == 0)
594                 {
595                         char       *s = recoded + strlen("FLAG");
596
597                         while (*s && t_isspace(s))
598                                 s++;
599
600                         if (*s && STRNCMP(s, "default") != 0)
601                                 ereport(ERROR,
602                                                 (errcode(ERRCODE_CONFIG_FILE_ERROR),
603                                                  errmsg("Ispell dictionary supports only default flag value")));
604                 }
605
606                 pfree(recoded);
607         }
608         tsearch_readline_end(&trst);
609
610         sprintf(scanbuf, "%%6s %%%ds %%%ds %%%ds %%%ds", BUFSIZ / 5, BUFSIZ / 5, BUFSIZ / 5, BUFSIZ / 5);
611
612         if (!tsearch_readline_begin(&trst, filename))
613                 ereport(ERROR,
614                                 (errcode(ERRCODE_CONFIG_FILE_ERROR),
615                                  errmsg("could not open affix file \"%s\": %m",
616                                                 filename)));
617
618         while ((recoded = tsearch_readline(&trst)) != NULL)
619         {
620                 if (*recoded == '\0' || t_isspace(recoded) || t_iseq(recoded, '#'))
621                         goto nextline;
622
623                 scanread = sscanf(recoded, scanbuf, type, sflag, find, repl, mask);
624
625                 if (ptype)
626                         pfree(ptype);
627                 ptype = lowerstr_ctx(type);
628                 if (scanread < 4 || (STRNCMP(ptype, "sfx") && STRNCMP(ptype, "pfx")))
629                         goto nextline;
630
631                 if (scanread == 4)
632                 {
633                         if (strlen(sflag) != 1)
634                                 goto nextline;
635                         flag = *sflag;
636                         isSuffix = (STRNCMP(ptype, "sfx") == 0) ? true : false;
637                         if (t_iseq(find, 'y') || t_iseq(find, 'Y'))
638                                 flagflags = FF_CROSSPRODUCT;
639                         else
640                                 flagflags = 0;
641                 }
642                 else
643                 {
644                         char       *ptr;
645                         int                     aflg = 0;
646
647                         if (strlen(sflag) != 1 || flag != *sflag || flag == 0)
648                                 goto nextline;
649                         prepl = lowerstr_ctx(repl);
650                         /* affix flag */
651                         if ((ptr = strchr(prepl, '/')) != NULL)
652                         {
653                                 *ptr = '\0';
654                                 ptr = repl + (ptr - prepl) + 1;
655                                 while (*ptr)
656                                 {
657                                         aflg |= Conf->flagval[(unsigned int) *ptr];
658                                         ptr++;
659                                 }
660                         }
661                         pfind = lowerstr_ctx(find);
662                         pmask = lowerstr_ctx(mask);
663                         if (t_iseq(find, '0'))
664                                 *pfind = '\0';
665                         if (t_iseq(repl, '0'))
666                                 *prepl = '\0';
667
668                         NIAddAffix(Conf, flag, flagflags | aflg, pmask, pfind, prepl,
669                                            isSuffix ? FF_SUFFIX : FF_PREFIX);
670                         pfree(prepl);
671                         pfree(pfind);
672                         pfree(pmask);
673                 }
674
675 nextline:
676                 pfree(recoded);
677         }
678
679         tsearch_readline_end(&trst);
680         if (ptype)
681                 pfree(ptype);
682 }
683
684 /*
685  * import affixes
686  *
687  * Note caller must already have applied get_tsearch_config_filename
688  */
689 void
690 NIImportAffixes(IspellDict *Conf, const char *filename)
691 {
692         char       *pstr = NULL;
693         char            mask[BUFSIZ];
694         char            find[BUFSIZ];
695         char            repl[BUFSIZ];
696         char       *s;
697         bool            suffixes = false;
698         bool            prefixes = false;
699         int                     flag = 0;
700         char            flagflags = 0;
701         tsearch_readline_state trst;
702         bool            oldformat = false;
703         char       *recoded = NULL;
704
705         checkTmpCtx();
706
707         if (!tsearch_readline_begin(&trst, filename))
708                 ereport(ERROR,
709                                 (errcode(ERRCODE_CONFIG_FILE_ERROR),
710                                  errmsg("could not open affix file \"%s\": %m",
711                                                 filename)));
712
713         memset(Conf->flagval, 0, sizeof(Conf->flagval));
714         Conf->usecompound = false;
715
716         while ((recoded = tsearch_readline(&trst)) != NULL)
717         {
718                 pstr = lowerstr(recoded);
719
720                 /* Skip comments and empty lines */
721                 if (*pstr == '#' || *pstr == '\n')
722                         goto nextline;
723
724                 if (STRNCMP(pstr, "compoundwords") == 0)
725                 {
726                         s = findchar(pstr, 'l');
727                         if (s)
728                         {
729                                 s = recoded + (s - pstr);               /* we need non-lowercased
730                                                                                                  * string */
731                                 while (*s && !t_isspace(s))
732                                         s++;
733                                 while (*s && t_isspace(s))
734                                         s++;
735
736                                 if (*s && pg_mblen(s) == 1)
737                                 {
738                                         Conf->flagval[(unsigned int) *s] = FF_COMPOUNDFLAG;
739                                         Conf->usecompound = true;
740                                 }
741                                 oldformat = true;
742                                 goto nextline;
743                         }
744                 }
745                 if (STRNCMP(pstr, "suffixes") == 0)
746                 {
747                         suffixes = true;
748                         prefixes = false;
749                         oldformat = true;
750                         goto nextline;
751                 }
752                 if (STRNCMP(pstr, "prefixes") == 0)
753                 {
754                         suffixes = false;
755                         prefixes = true;
756                         oldformat = true;
757                         goto nextline;
758                 }
759                 if (STRNCMP(pstr, "flag") == 0)
760                 {
761                         s = recoded + 4;        /* we need non-lowercased string */
762                         flagflags = 0;
763
764                         while (*s && t_isspace(s))
765                                 s++;
766                         oldformat = true;
767
768                         /* allow only single-encoded flags */
769                         if (pg_mblen(s) != 1)
770                                 ereport(ERROR,
771                                                 (errcode(ERRCODE_CONFIG_FILE_ERROR),
772                                                  errmsg("multibyte flag character is not allowed")));
773
774                         if (*s == '*')
775                         {
776                                 flagflags |= FF_CROSSPRODUCT;
777                                 s++;
778                         }
779                         else if (*s == '~')
780                         {
781                                 flagflags |= FF_COMPOUNDONLY;
782                                 s++;
783                         }
784
785                         if (*s == '\\')
786                                 s++;
787
788                         /* allow only single-encoded flags */
789                         if (pg_mblen(s) != 1)
790                                 ereport(ERROR,
791                                                 (errcode(ERRCODE_CONFIG_FILE_ERROR),
792                                                  errmsg("multibyte flag character is not allowed")));
793
794                         flag = (unsigned char) *s;
795                         goto nextline;
796                 }
797                 if (STRNCMP(recoded, "COMPOUNDFLAG") == 0 || STRNCMP(recoded, "COMPOUNDMIN") == 0 ||
798                         STRNCMP(recoded, "PFX") == 0 || STRNCMP(recoded, "SFX") == 0)
799                 {
800                         if (oldformat)
801                                 ereport(ERROR,
802                                                 (errcode(ERRCODE_CONFIG_FILE_ERROR),
803                                                  errmsg("wrong affix file format for flag")));
804                         tsearch_readline_end(&trst);
805                         NIImportOOAffixes(Conf, filename);
806                         return;
807                 }
808                 if ((!suffixes) && (!prefixes))
809                         goto nextline;
810
811                 if (!parse_affentry(pstr, mask, find, repl))
812                         goto nextline;
813
814                 NIAddAffix(Conf, flag, flagflags, mask, find, repl, suffixes ? FF_SUFFIX : FF_PREFIX);
815
816 nextline:
817                 pfree(recoded);
818                 pfree(pstr);
819         }
820         tsearch_readline_end(&trst);
821 }
822
823 static int
824 MergeAffix(IspellDict *Conf, int a1, int a2)
825 {
826         char      **ptr;
827
828         while (Conf->nAffixData + 1 >= Conf->lenAffixData)
829         {
830                 Conf->lenAffixData *= 2;
831                 Conf->AffixData = (char **) repalloc(Conf->AffixData,
832                                                                                 sizeof(char *) * Conf->lenAffixData);
833         }
834
835         ptr = Conf->AffixData + Conf->nAffixData;
836         *ptr = palloc(strlen(Conf->AffixData[a1]) + strlen(Conf->AffixData[a2]) +
837                                   1 /* space */ + 1 /* \0 */ );
838         sprintf(*ptr, "%s %s", Conf->AffixData[a1], Conf->AffixData[a2]);
839         ptr++;
840         *ptr = NULL;
841         Conf->nAffixData++;
842
843         return Conf->nAffixData - 1;
844 }
845
846 static uint32
847 makeCompoundFlags(IspellDict *Conf, int affix)
848 {
849         uint32          flag = 0;
850         char       *str = Conf->AffixData[affix];
851
852         while (str && *str)
853         {
854                 flag |= Conf->flagval[(unsigned int) *str];
855                 str++;
856         }
857
858         return (flag & FF_DICTFLAGMASK);
859 }
860
861 static SPNode *
862 mkSPNode(IspellDict *Conf, int low, int high, int level)
863 {
864         int                     i;
865         int                     nchar = 0;
866         char            lastchar = '\0';
867         SPNode     *rs;
868         SPNodeData *data;
869         int                     lownew = low;
870
871         for (i = low; i < high; i++)
872                 if (Conf->Spell[i]->p.d.len > level && lastchar != Conf->Spell[i]->word[level])
873                 {
874                         nchar++;
875                         lastchar = Conf->Spell[i]->word[level];
876                 }
877
878         if (!nchar)
879                 return NULL;
880
881         rs = (SPNode *) palloc0(SPNHDRSZ + nchar * sizeof(SPNodeData));
882         rs->length = nchar;
883         data = rs->data;
884
885         lastchar = '\0';
886         for (i = low; i < high; i++)
887                 if (Conf->Spell[i]->p.d.len > level)
888                 {
889                         if (lastchar != Conf->Spell[i]->word[level])
890                         {
891                                 if (lastchar)
892                                 {
893                                         data->node = mkSPNode(Conf, lownew, i, level + 1);
894                                         lownew = i;
895                                         data++;
896                                 }
897                                 lastchar = Conf->Spell[i]->word[level];
898                         }
899                         data->val = ((uint8 *) (Conf->Spell[i]->word))[level];
900                         if (Conf->Spell[i]->p.d.len == level + 1)
901                         {
902                                 bool            clearCompoundOnly = false;
903
904                                 if (data->isword && data->affix != Conf->Spell[i]->p.d.affix)
905                                 {
906                                         /*
907                                          * MergeAffix called a few times. If one of word is
908                                          * allowed to be in compound word and another isn't, then
909                                          * clear FF_COMPOUNDONLY flag.
910                                          */
911
912                                         clearCompoundOnly = (FF_COMPOUNDONLY & data->compoundflag
913                                                 & makeCompoundFlags(Conf, Conf->Spell[i]->p.d.affix))
914                                                 ? false : true;
915                                         data->affix = MergeAffix(Conf, data->affix, Conf->Spell[i]->p.d.affix);
916                                 }
917                                 else
918                                         data->affix = Conf->Spell[i]->p.d.affix;
919                                 data->isword = 1;
920
921                                 data->compoundflag = makeCompoundFlags(Conf, data->affix);
922
923                                 if ((data->compoundflag & FF_COMPOUNDONLY) &&
924                                         (data->compoundflag & FF_COMPOUNDFLAG) == 0)
925                                         data->compoundflag |= FF_COMPOUNDFLAG;
926
927                                 if (clearCompoundOnly)
928                                         data->compoundflag &= ~FF_COMPOUNDONLY;
929                         }
930                 }
931
932         data->node = mkSPNode(Conf, lownew, high, level + 1);
933
934         return rs;
935 }
936
937 /*
938  * Builds the Conf->Dictionary tree and AffixData from the imported dictionary
939  * and affixes.
940  */
941 void
942 NISortDictionary(IspellDict *Conf)
943 {
944         int                     i;
945         int                     naffix = 0;
946         int                     curaffix;
947
948         checkTmpCtx();
949
950         /* compress affixes */
951
952         /* Count the number of different flags used in the dictionary */
953
954         qsort((void *) Conf->Spell, Conf->nspell, sizeof(SPELL *), cmpspellaffix);
955
956         naffix = 0;
957         for (i = 0; i < Conf->nspell; i++)
958         {
959                 if (i == 0 || strncmp(Conf->Spell[i]->p.flag, Conf->Spell[i - 1]->p.flag, MAXFLAGLEN))
960                         naffix++;
961         }
962
963         /*
964          * Fill in Conf->AffixData with the affixes that were used in the
965          * dictionary. Replace textual flag-field of Conf->Spell entries with
966          * indexes into Conf->AffixData array.
967          */
968         Conf->AffixData = (char **) palloc0(naffix * sizeof(char *));
969
970         curaffix = -1;
971         for (i = 0; i < Conf->nspell; i++)
972         {
973                 if (i == 0 || strncmp(Conf->Spell[i]->p.flag, Conf->AffixData[curaffix], MAXFLAGLEN))
974                 {
975                         curaffix++;
976                         Assert(curaffix < naffix);
977                         Conf->AffixData[curaffix] = pstrdup(Conf->Spell[i]->p.flag);
978                 }
979
980                 Conf->Spell[i]->p.d.affix = curaffix;
981                 Conf->Spell[i]->p.d.len = strlen(Conf->Spell[i]->word);
982         }
983
984         Conf->lenAffixData = Conf->nAffixData = naffix;
985
986         qsort((void *) Conf->Spell, Conf->nspell, sizeof(SPELL *), cmpspell);
987         Conf->Dictionary = mkSPNode(Conf, 0, Conf->nspell, 0);
988
989         Conf->Spell = NULL;
990 }
991
992 static AffixNode *
993 mkANode(IspellDict *Conf, int low, int high, int level, int type)
994 {
995         int                     i;
996         int                     nchar = 0;
997         uint8           lastchar = '\0';
998         AffixNode  *rs;
999         AffixNodeData *data;
1000         int                     lownew = low;
1001         int                     naff;
1002         AFFIX     **aff;
1003
1004         for (i = low; i < high; i++)
1005                 if (Conf->Affix[i].replen > level && lastchar != GETCHAR(Conf->Affix + i, level, type))
1006                 {
1007                         nchar++;
1008                         lastchar = GETCHAR(Conf->Affix + i, level, type);
1009                 }
1010
1011         if (!nchar)
1012                 return NULL;
1013
1014         aff = (AFFIX **) tmpalloc(sizeof(AFFIX *) * (high - low + 1));
1015         naff = 0;
1016
1017         rs = (AffixNode *) palloc0(ANHRDSZ + nchar * sizeof(AffixNodeData));
1018         rs->length = nchar;
1019         data = rs->data;
1020
1021         lastchar = '\0';
1022         for (i = low; i < high; i++)
1023                 if (Conf->Affix[i].replen > level)
1024                 {
1025                         if (lastchar != GETCHAR(Conf->Affix + i, level, type))
1026                         {
1027                                 if (lastchar)
1028                                 {
1029                                         data->node = mkANode(Conf, lownew, i, level + 1, type);
1030                                         if (naff)
1031                                         {
1032                                                 data->naff = naff;
1033                                                 data->aff = (AFFIX **) palloc(sizeof(AFFIX *) * naff);
1034                                                 memcpy(data->aff, aff, sizeof(AFFIX *) * naff);
1035                                                 naff = 0;
1036                                         }
1037                                         data++;
1038                                         lownew = i;
1039                                 }
1040                                 lastchar = GETCHAR(Conf->Affix + i, level, type);
1041                         }
1042                         data->val = GETCHAR(Conf->Affix + i, level, type);
1043                         if (Conf->Affix[i].replen == level + 1)
1044                         {                                       /* affix stopped */
1045                                 aff[naff++] = Conf->Affix + i;
1046                         }
1047                 }
1048
1049         data->node = mkANode(Conf, lownew, high, level + 1, type);
1050         if (naff)
1051         {
1052                 data->naff = naff;
1053                 data->aff = (AFFIX **) palloc(sizeof(AFFIX *) * naff);
1054                 memcpy(data->aff, aff, sizeof(AFFIX *) * naff);
1055                 naff = 0;
1056         }
1057
1058         pfree(aff);
1059
1060         return rs;
1061 }
1062
1063 static void
1064 mkVoidAffix(IspellDict *Conf, bool issuffix, int startsuffix)
1065 {
1066         int                     i,
1067                                 cnt = 0;
1068         int                     start = (issuffix) ? startsuffix : 0;
1069         int                     end = (issuffix) ? Conf->naffixes : startsuffix;
1070         AffixNode  *Affix = (AffixNode *) palloc0(ANHRDSZ + sizeof(AffixNodeData));
1071
1072         Affix->length = 1;
1073         Affix->isvoid = 1;
1074
1075         if (issuffix)
1076         {
1077                 Affix->data->node = Conf->Suffix;
1078                 Conf->Suffix = Affix;
1079         }
1080         else
1081         {
1082                 Affix->data->node = Conf->Prefix;
1083                 Conf->Prefix = Affix;
1084         }
1085
1086
1087         for (i = start; i < end; i++)
1088                 if (Conf->Affix[i].replen == 0)
1089                         cnt++;
1090
1091         if (cnt == 0)
1092                 return;
1093
1094         Affix->data->aff = (AFFIX **) palloc(sizeof(AFFIX *) * cnt);
1095         Affix->data->naff = (uint32) cnt;
1096
1097         cnt = 0;
1098         for (i = start; i < end; i++)
1099                 if (Conf->Affix[i].replen == 0)
1100                 {
1101                         Affix->data->aff[cnt] = Conf->Affix + i;
1102                         cnt++;
1103                 }
1104 }
1105
1106 static bool
1107 isAffixInUse(IspellDict *Conf, char flag)
1108 {
1109         int                     i;
1110
1111         for (i = 0; i < Conf->nAffixData; i++)
1112                 if (strchr(Conf->AffixData[i], flag) != NULL)
1113                         return true;
1114
1115         return false;
1116 }
1117
1118 void
1119 NISortAffixes(IspellDict *Conf)
1120 {
1121         AFFIX      *Affix;
1122         size_t          i;
1123         CMPDAffix  *ptr;
1124         int                     firstsuffix = Conf->naffixes;
1125
1126         checkTmpCtx();
1127
1128         if (Conf->naffixes == 0)
1129                 return;
1130
1131         if (Conf->naffixes > 1)
1132                 qsort((void *) Conf->Affix, Conf->naffixes, sizeof(AFFIX), cmpaffix);
1133         Conf->CompoundAffix = ptr = (CMPDAffix *) palloc(sizeof(CMPDAffix) * Conf->naffixes);
1134         ptr->affix = NULL;
1135
1136         for (i = 0; i < Conf->naffixes; i++)
1137         {
1138                 Affix = &(((AFFIX *) Conf->Affix)[i]);
1139                 if (Affix->type == FF_SUFFIX && i < firstsuffix)
1140                         firstsuffix = i;
1141
1142                 if ((Affix->flagflags & FF_COMPOUNDFLAG) && Affix->replen > 0 &&
1143                         isAffixInUse(Conf, (char) Affix->flag))
1144                 {
1145                         if (ptr == Conf->CompoundAffix ||
1146                                 ptr->issuffix != (ptr - 1)->issuffix ||
1147                                 strbncmp((const unsigned char *) (ptr - 1)->affix,
1148                                                  (const unsigned char *) Affix->repl,
1149                                                  (ptr - 1)->len))
1150                         {
1151                                 /* leave only unique and minimals suffixes */
1152                                 ptr->affix = Affix->repl;
1153                                 ptr->len = Affix->replen;
1154                                 ptr->issuffix = (Affix->type == FF_SUFFIX) ? true : false;
1155                                 ptr++;
1156                         }
1157                 }
1158         }
1159         ptr->affix = NULL;
1160         Conf->CompoundAffix = (CMPDAffix *) repalloc(Conf->CompoundAffix, sizeof(CMPDAffix) * (ptr - Conf->CompoundAffix + 1));
1161
1162         Conf->Prefix = mkANode(Conf, 0, firstsuffix, 0, FF_PREFIX);
1163         Conf->Suffix = mkANode(Conf, firstsuffix, Conf->naffixes, 0, FF_SUFFIX);
1164         mkVoidAffix(Conf, true, firstsuffix);
1165         mkVoidAffix(Conf, false, firstsuffix);
1166 }
1167
1168 static AffixNodeData *
1169 FindAffixes(AffixNode *node, const char *word, int wrdlen, int *level, int type)
1170 {
1171         AffixNodeData *StopLow,
1172                            *StopHigh,
1173                            *StopMiddle;
1174         uint8 symbol;
1175
1176         if (node->isvoid)
1177         {                                                       /* search void affixes */
1178                 if (node->data->naff)
1179                         return node->data;
1180                 node = node->data->node;
1181         }
1182
1183         while (node && *level < wrdlen)
1184         {
1185                 StopLow = node->data;
1186                 StopHigh = node->data + node->length;
1187                 while (StopLow < StopHigh)
1188                 {
1189                         StopMiddle = StopLow + ((StopHigh - StopLow) >> 1);
1190                         symbol = GETWCHAR(word, wrdlen, *level, type);
1191
1192                         if (StopMiddle->val == symbol)
1193                         {
1194                                 (*level)++;
1195                                 if (StopMiddle->naff)
1196                                         return StopMiddle;
1197                                 node = StopMiddle->node;
1198                                 break;
1199                         }
1200                         else if (StopMiddle->val < symbol)
1201                                 StopLow = StopMiddle + 1;
1202                         else
1203                                 StopHigh = StopMiddle;
1204                 }
1205                 if (StopLow >= StopHigh)
1206                         break;
1207         }
1208         return NULL;
1209 }
1210
1211 static char *
1212 CheckAffix(const char *word, size_t len, AFFIX *Affix, int flagflags, char *newword, int *baselen)
1213 {
1214         /*
1215          * Check compound allow flags
1216          */
1217
1218         if (flagflags == 0)
1219         {
1220                 if (Affix->flagflags & FF_COMPOUNDONLY)
1221                         return NULL;
1222         }
1223         else if (flagflags & FF_COMPOUNDBEGIN)
1224         {
1225                 if (Affix->flagflags & FF_COMPOUNDFORBIDFLAG)
1226                         return NULL;
1227                 if ((Affix->flagflags & FF_COMPOUNDBEGIN) == 0)
1228                         if (Affix->type == FF_SUFFIX)
1229                                 return NULL;
1230         }
1231         else if (flagflags & FF_COMPOUNDMIDDLE)
1232         {
1233                 if ((Affix->flagflags & FF_COMPOUNDMIDDLE) == 0 ||
1234                         (Affix->flagflags & FF_COMPOUNDFORBIDFLAG))
1235                         return NULL;
1236         }
1237         else if (flagflags & FF_COMPOUNDLAST)
1238         {
1239                 if (Affix->flagflags & FF_COMPOUNDFORBIDFLAG)
1240                         return NULL;
1241                 if ((Affix->flagflags & FF_COMPOUNDLAST) == 0)
1242                         if (Affix->type == FF_PREFIX)
1243                                 return NULL;
1244         }
1245
1246         /*
1247          * make replace pattern of affix
1248          */
1249         if (Affix->type == FF_SUFFIX)
1250         {
1251                 strcpy(newword, word);
1252                 strcpy(newword + len - Affix->replen, Affix->find);
1253                 if (baselen)                    /* store length of non-changed part of word */
1254                         *baselen = len - Affix->replen;
1255         }
1256         else
1257         {
1258                 /*
1259                  * if prefix is a all non-chaged part's length then all word contains
1260                  * only prefix and suffix, so out
1261                  */
1262                 if (baselen && *baselen + strlen(Affix->find) <= Affix->replen)
1263                         return NULL;
1264                 strcpy(newword, Affix->find);
1265                 strcat(newword, word + Affix->replen);
1266         }
1267
1268         /*
1269          * check resulting word
1270          */
1271         if (Affix->issimple)
1272                 return newword;
1273         else if (Affix->isregis)
1274         {
1275                 if (RS_execute(&(Affix->reg.regis), newword))
1276                         return newword;
1277         }
1278         else
1279         {
1280                 int                     err;
1281                 pg_wchar   *data;
1282                 size_t          data_len;
1283                 int                     newword_len;
1284
1285                 /* Convert data string to wide characters */
1286                 newword_len = strlen(newword);
1287                 data = (pg_wchar *) palloc((newword_len + 1) * sizeof(pg_wchar));
1288                 data_len = pg_mb2wchar_with_len(newword, data, newword_len);
1289
1290                 if (!(err = pg_regexec(&(Affix->reg.regex), data, data_len, 0, NULL, 0, NULL, 0)))
1291                 {
1292                         pfree(data);
1293                         return newword;
1294                 }
1295                 pfree(data);
1296         }
1297
1298         return NULL;
1299 }
1300
1301 static int
1302 addToResult(char **forms, char **cur, char *word)
1303 {
1304         if (cur - forms >= MAX_NORM - 1)
1305                 return 0;
1306         if (forms == cur || strcmp(word, *(cur - 1)) != 0)
1307         {
1308                 *cur = pstrdup(word);
1309                 *(cur+1) = NULL;
1310                 return 1;
1311         }
1312
1313         return 0;
1314 }
1315
1316 static char **
1317 NormalizeSubWord(IspellDict *Conf, char *word, int flag)
1318 {
1319         AffixNodeData *suffix = NULL,
1320                            *prefix = NULL;
1321         int                     slevel = 0,
1322                                 plevel = 0;
1323         int                     wrdlen = strlen(word),
1324                                 swrdlen;
1325         char      **forms;
1326         char      **cur;
1327         char            newword[2 * MAXNORMLEN] = "";
1328         char            pnewword[2 * MAXNORMLEN] = "";
1329         AffixNode  *snode = Conf->Suffix,
1330                            *pnode;
1331         int                     i,
1332                                 j;
1333
1334         if (wrdlen > MAXNORMLEN)
1335                 return NULL;
1336         cur = forms = (char **) palloc(MAX_NORM * sizeof(char *));
1337         *cur = NULL;
1338
1339
1340         /* Check that the word itself is normal form */
1341         if (FindWord(Conf, word, 0, flag))
1342         {
1343                 *cur = pstrdup(word);
1344                 cur++;
1345                 *cur = NULL;
1346         }
1347
1348         /* Find all other NORMAL forms of the 'word' (check only prefix) */
1349         pnode = Conf->Prefix;
1350         plevel = 0;
1351         while (pnode)
1352         {
1353                 prefix = FindAffixes(pnode, word, wrdlen, &plevel, FF_PREFIX);
1354                 if (!prefix)
1355                         break;
1356                 for (j = 0; j < prefix->naff; j++)
1357                 {
1358                         if (CheckAffix(word, wrdlen, prefix->aff[j], flag, newword, NULL))
1359                         {
1360                                 /* prefix success */
1361                                 if (FindWord(Conf, newword, prefix->aff[j]->flag, flag))
1362                                         cur += addToResult(forms, cur, newword);
1363                         }
1364                 }
1365                 pnode = prefix->node;
1366         }
1367
1368         /*
1369          * Find all other NORMAL forms of the 'word' (check suffix and then
1370          * prefix)
1371          */
1372         while (snode)
1373         {
1374                 int                     baselen = 0;
1375
1376                 /* find possible suffix */
1377                 suffix = FindAffixes(snode, word, wrdlen, &slevel, FF_SUFFIX);
1378                 if (!suffix)
1379                         break;
1380                 /* foreach suffix check affix */
1381                 for (i = 0; i < suffix->naff; i++)
1382                 {
1383                         if (CheckAffix(word, wrdlen, suffix->aff[i], flag, newword, &baselen))
1384                         {
1385                                 /* suffix success */
1386                                 if (FindWord(Conf, newword, suffix->aff[i]->flag, flag))
1387                                         cur += addToResult(forms, cur, newword);
1388
1389                                 /* now we will look changed word with prefixes */
1390                                 pnode = Conf->Prefix;
1391                                 plevel = 0;
1392                                 swrdlen = strlen(newword);
1393                                 while (pnode)
1394                                 {
1395                                         prefix = FindAffixes(pnode, newword, swrdlen, &plevel, FF_PREFIX);
1396                                         if (!prefix)
1397                                                 break;
1398                                         for (j = 0; j < prefix->naff; j++)
1399                                         {
1400                                                 if (CheckAffix(newword, swrdlen, prefix->aff[j], flag, pnewword, &baselen))
1401                                                 {
1402                                                         /* prefix success */
1403                                                         int                     ff = (prefix->aff[j]->flagflags & suffix->aff[i]->flagflags & FF_CROSSPRODUCT) ?
1404                                                         0 : prefix->aff[j]->flag;
1405
1406                                                         if (FindWord(Conf, pnewword, ff, flag))
1407                                                                 cur += addToResult(forms, cur, pnewword);
1408                                                 }
1409                                         }
1410                                         pnode = prefix->node;
1411                                 }
1412                         }
1413                 }
1414
1415                 snode = suffix->node;
1416         }
1417
1418         if (cur == forms)
1419         {
1420                 pfree(forms);
1421                 return (NULL);
1422         }
1423         return (forms);
1424 }
1425
1426 typedef struct SplitVar
1427 {
1428         int                     nstem;
1429         int                     lenstem;
1430         char      **stem;
1431         struct SplitVar *next;
1432 } SplitVar;
1433
1434 static int
1435 CheckCompoundAffixes(CMPDAffix **ptr, char *word, int len, bool CheckInPlace)
1436 {
1437         bool            issuffix;
1438
1439         if (CheckInPlace)
1440         {
1441                 while ((*ptr)->affix)
1442                 {
1443                         if (len > (*ptr)->len && strncmp((*ptr)->affix, word, (*ptr)->len) == 0)
1444                         {
1445                                 len = (*ptr)->len;
1446                                 issuffix = (*ptr)->issuffix;
1447                                 (*ptr)++;
1448                                 return (issuffix) ? len : 0;
1449                         }
1450                         (*ptr)++;
1451                 }
1452         }
1453         else
1454         {
1455                 char       *affbegin;
1456
1457                 while ((*ptr)->affix)
1458                 {
1459                         if (len > (*ptr)->len && (affbegin = strstr(word, (*ptr)->affix)) != NULL)
1460                         {
1461                                 len = (*ptr)->len + (affbegin - word);
1462                                 issuffix = (*ptr)->issuffix;
1463                                 (*ptr)++;
1464                                 return (issuffix) ? len : 0;
1465                         }
1466                         (*ptr)++;
1467                 }
1468         }
1469         return -1;
1470 }
1471
1472 static SplitVar *
1473 CopyVar(SplitVar *s, int makedup)
1474 {
1475         SplitVar   *v = (SplitVar *) palloc(sizeof(SplitVar));
1476
1477         v->next = NULL;
1478         if (s)
1479         {
1480                 int                     i;
1481
1482                 v->lenstem = s->lenstem;
1483                 v->stem = (char **) palloc(sizeof(char *) * v->lenstem);
1484                 v->nstem = s->nstem;
1485                 for (i = 0; i < s->nstem; i++)
1486                         v->stem[i] = (makedup) ? pstrdup(s->stem[i]) : s->stem[i];
1487         }
1488         else
1489         {
1490                 v->lenstem = 16;
1491                 v->stem = (char **) palloc(sizeof(char *) * v->lenstem);
1492                 v->nstem = 0;
1493         }
1494         return v;
1495 }
1496
1497 static void
1498 AddStem(SplitVar *v, char *word)
1499 {
1500         if ( v->nstem >= v->lenstem )
1501         {
1502                 v->lenstem *= 2;
1503                 v->stem = (char **) repalloc(v->stem, sizeof(char *) * v->lenstem);
1504         }
1505
1506         v->stem[v->nstem] = word;
1507         v->nstem++;
1508 }
1509
1510 static SplitVar *
1511 SplitToVariants(IspellDict *Conf, SPNode *snode, SplitVar *orig, char *word, int wordlen, int startpos, int minpos)
1512 {
1513         SplitVar   *var = NULL;
1514         SPNodeData *StopLow,
1515                            *StopHigh,
1516                            *StopMiddle = NULL;
1517         SPNode     *node = (snode) ? snode : Conf->Dictionary;
1518         int                     level = (snode) ? minpos : startpos;    /* recursive
1519                                                                                                                  * minpos==level */
1520         int                     lenaff;
1521         CMPDAffix  *caff;
1522         char       *notprobed;
1523         int                     compoundflag = 0;
1524
1525         notprobed = (char *) palloc(wordlen);
1526         memset(notprobed, 1, wordlen);
1527         var = CopyVar(orig, 1);
1528
1529         while (level < wordlen)
1530         {
1531                 /* find word with epenthetic or/and compound affix */
1532                 caff = Conf->CompoundAffix;
1533                 while (level > startpos && (lenaff = CheckCompoundAffixes(&caff, word + level, wordlen - level, (node) ? true : false)) >= 0)
1534                 {
1535                         /*
1536                          * there is one of compound affixes, so check word for existings
1537                          */
1538                         char            buf[MAXNORMLEN];
1539                         char      **subres;
1540
1541                         lenaff = level - startpos + lenaff;
1542
1543                         if (!notprobed[startpos + lenaff - 1])
1544                                 continue;
1545
1546                         if (level + lenaff - 1 <= minpos)
1547                                 continue;
1548
1549                         if ( lenaff >= MAXNORMLEN )
1550                                 continue; /* skip too big value */
1551                         if (lenaff > 0)
1552                                 memcpy(buf, word + startpos, lenaff);
1553                         buf[lenaff] = '\0';
1554
1555                         if (level == 0)
1556                                 compoundflag = FF_COMPOUNDBEGIN;
1557                         else if (level == wordlen - 1)
1558                                 compoundflag = FF_COMPOUNDLAST;
1559                         else
1560                                 compoundflag = FF_COMPOUNDMIDDLE;
1561                         subres = NormalizeSubWord(Conf, buf, compoundflag);
1562                         if (subres)
1563                         {
1564                                 /* Yes, it was a word from dictionary */
1565                                 SplitVar   *new = CopyVar(var, 0);
1566                                 SplitVar   *ptr = var;
1567                                 char      **sptr = subres;
1568
1569                                 notprobed[startpos + lenaff - 1] = 0;
1570
1571                                 while (*sptr)
1572                                 {
1573                                         AddStem( new, *sptr ); 
1574                                         sptr++;
1575                                 }
1576                                 pfree(subres);
1577
1578                                 while (ptr->next)
1579                                         ptr = ptr->next;
1580                                 ptr->next = SplitToVariants(Conf, NULL, new, word, wordlen, startpos + lenaff, startpos + lenaff);
1581
1582                                 pfree(new->stem);
1583                                 pfree(new);
1584                         }
1585                 }
1586
1587                 if (!node)
1588                         break;
1589
1590                 StopLow = node->data;
1591                 StopHigh = node->data + node->length;
1592                 while (StopLow < StopHigh)
1593                 {
1594                         StopMiddle = StopLow + ((StopHigh - StopLow) >> 1);
1595                         if (StopMiddle->val == ((uint8 *) (word))[level])
1596                                 break;
1597                         else if (StopMiddle->val < ((uint8 *) (word))[level])
1598                                 StopLow = StopMiddle + 1;
1599                         else
1600                                 StopHigh = StopMiddle;
1601                 }
1602
1603                 if (StopLow < StopHigh)
1604                 {
1605                         if (level == FF_COMPOUNDBEGIN)
1606                                 compoundflag = FF_COMPOUNDBEGIN;
1607                         else if (level == wordlen - 1)
1608                                 compoundflag = FF_COMPOUNDLAST;
1609                         else
1610                                 compoundflag = FF_COMPOUNDMIDDLE;
1611
1612                         /* find infinitive */
1613                         if (StopMiddle->isword &&
1614                                 (StopMiddle->compoundflag & compoundflag) &&
1615                                 notprobed[level])
1616                         {
1617                                 /* ok, we found full compoundallowed word */
1618                                 if (level > minpos)
1619                                 {
1620                                         /* and its length more than minimal */
1621                                         if (wordlen == level + 1)
1622                                         {
1623                                                 /* well, it was last word */
1624                                                 AddStem( var, pnstrdup(word + startpos, wordlen - startpos) );
1625                                                 pfree(notprobed);
1626                                                 return var;
1627                                         }
1628                                         else
1629                                         {
1630                                                 /* then we will search more big word at the same point */
1631                                                 SplitVar   *ptr = var;
1632
1633                                                 while (ptr->next)
1634                                                         ptr = ptr->next;
1635                                                 ptr->next = SplitToVariants(Conf, node, var, word, wordlen, startpos, level);
1636                                                 /* we can find next word */
1637                                                 level++;
1638                                                 AddStem( var, pnstrdup(word + startpos, level - startpos) );
1639                                                 node = Conf->Dictionary;
1640                                                 startpos = level;
1641                                                 continue;
1642                                         }
1643                                 }
1644                         }
1645                         node = StopMiddle->node;
1646                 }
1647                 else
1648                         node = NULL;
1649                 level++;
1650         }
1651
1652         AddStem( var, pnstrdup(word + startpos, wordlen - startpos) );
1653         pfree(notprobed);
1654         return var;
1655 }
1656
1657 static void
1658 addNorm( TSLexeme **lres, TSLexeme **lcur, char *word, int flags, uint16 NVariant)
1659 {
1660         if ( *lres == NULL ) 
1661                 *lcur = *lres = (TSLexeme *) palloc(MAX_NORM * sizeof(TSLexeme));
1662
1663         if ( *lcur - *lres < MAX_NORM-1 ) { 
1664                 (*lcur)->lexeme = word;
1665                 (*lcur)->flags = flags;
1666                 (*lcur)->nvariant = NVariant;
1667                 (*lcur)++;
1668                 (*lcur)->lexeme = NULL;
1669         }
1670 }
1671
1672 TSLexeme *
1673 NINormalizeWord(IspellDict *Conf, char *word)
1674 {
1675         char      **res;
1676         TSLexeme   *lcur = NULL,
1677                            *lres = NULL;
1678         uint16          NVariant = 1;
1679
1680         res = NormalizeSubWord(Conf, word, 0);
1681
1682         if (res)
1683         {
1684                 char      **ptr = res;
1685
1686                 while (*ptr && (lcur-lres) < MAX_NORM)
1687                 {
1688                         addNorm( &lres, &lcur, *ptr, 0, NVariant++);
1689                         ptr++;
1690                 }
1691                 pfree(res);
1692         }
1693
1694         if (Conf->usecompound)
1695         {
1696                 int                     wordlen = strlen(word);
1697                 SplitVar   *ptr,
1698                                    *var = SplitToVariants(Conf, NULL, NULL, word, wordlen, 0, -1);
1699                 int                     i;
1700
1701                 while (var)
1702                 {
1703                         if (var->nstem > 1)
1704                         {
1705                                 char      **subres = NormalizeSubWord(Conf, var->stem[var->nstem - 1], FF_COMPOUNDLAST);
1706
1707                                 if (subres)
1708                                 {
1709                                         char      **subptr = subres;
1710
1711                                         while (*subptr)
1712                                         {
1713                                                 for (i = 0; i < var->nstem - 1; i++)
1714                                                 {
1715                                                         addNorm( &lres, &lcur, (subptr == subres) ? var->stem[i] : pstrdup(var->stem[i]), 0, NVariant); 
1716                                                 }
1717
1718                                                 addNorm( &lres, &lcur, *subptr, 0, NVariant); 
1719                                                 subptr++;
1720                                                 NVariant++;
1721                                         }
1722
1723                                         pfree(subres);
1724                                         var->stem[0] = NULL;
1725                                         pfree(var->stem[var->nstem - 1]);
1726                                 }
1727                         }
1728
1729                         for (i = 0; i < var->nstem && var->stem[i]; i++)
1730                                 pfree(var->stem[i]);
1731                         ptr = var->next;
1732                         pfree(var->stem);
1733                         pfree(var);
1734                         var = ptr;
1735                 }
1736         }
1737
1738         return lres;
1739 }