]> granicus.if.org Git - postgresql/commitdiff
Now ispell dictionary can eat dictionaries in MySpell format,
authorTeodor Sigaev <teodor@sigaev.ru>
Fri, 9 Jun 2006 13:25:59 +0000 (13:25 +0000)
committerTeodor Sigaev <teodor@sigaev.ru>
Fri, 9 Jun 2006 13:25:59 +0000 (13:25 +0000)
used by OpenOffice. Dictionaries are placed at
http://lingucomponent.openoffice.org/spell_dic.html
Dictionary automatically recognizes format of files.

Warning. MySpell's format has limitation with compound
word support: it's impossible to mark affix as
compound-only affix. So for norwegian, german etc
languages it's recommended to use original ispell format.
For that reason I don't want to remove my2ispell
scripts, it's has workaround at least for norwegian language.

contrib/tsearch2/ispell/spell.c
contrib/tsearch2/ispell/spell.h

index 223ae4a9ada90fc4e90ef23122b6caf12eac6644..28f38eefd32b71aaa85aeee5c5987f46686e1eb8 100644 (file)
@@ -391,6 +391,7 @@ NIImportAffixes(IspellDict * Conf, const char *filename)
        char            flagflags = 0;
        FILE       *affix;
        int     line=0;
+       int     oldformat = 0;
 
        if (!(affix = fopen(filename, "r")))
                return (1);
@@ -412,6 +413,7 @@ NIImportAffixes(IspellDict * Conf, const char *filename)
                                while (*s && t_isspace(s)) s++;
                                if ( *s && pg_mblen(s) == 1 ) 
                                        Conf->compoundcontrol = *s;
+                               oldformat++;
                                continue;
                        }
                }
@@ -419,12 +421,14 @@ NIImportAffixes(IspellDict * Conf, const char *filename)
                {
                        suffixes = 1;
                        prefixes = 0;
+                       oldformat++;
                        continue;
                }
                if (STRNCMP(tmpstr, "prefixes") == 0)
                {
                        suffixes = 0;
                        prefixes = 1;
+                       oldformat++;
                        continue;
                }
                if (STRNCMP(tmpstr, "flag") == 0)
@@ -433,10 +437,11 @@ NIImportAffixes(IspellDict * Conf, const char *filename)
                        flagflags = 0;
 
                        while (*s && t_isspace(s)) s++;
+                       oldformat++;
 
                        /* allow only single-encoded flags */
-                       if ( pg_mblen(s) != 1 )
-                               continue;                       
+                       if ( pg_mblen(s) != 1 ) 
+                               elog(ERROR,"Multiencoded flag at line %d: %s", line, s);
 
                        if (*s == '*')
                        {
@@ -455,12 +460,22 @@ NIImportAffixes(IspellDict * Conf, const char *filename)
                        /* allow only single-encoded flags */
                        if ( pg_mblen(s) != 1 ) {
                                flagflags = 0;
-                               continue;
+                               elog(ERROR,"Multiencoded flag at line %d: %s", line, s);
                        }
 
                        flag = (unsigned char) *s;
                        continue;
                }
+               if ( STRNCMP(str, "COMPOUNDFLAG") == 0 || STRNCMP(str, "COMPOUNDMIN") == 0 || 
+                                        STRNCMP(str, "PFX")==0 || STRNCMP(str, "SFX")==0 ) {
+
+                       if ( oldformat ) 
+                               elog(ERROR,"Wrong affix file format");
+
+                       fclose(affix);
+                       return NIImportOOAffixes(Conf, filename);
+                       
+               }
                if ((!suffixes) && (!prefixes))
                        continue;
 
@@ -475,6 +490,79 @@ NIImportAffixes(IspellDict * Conf, const char *filename)
        return (0);
 }
 
+int
+NIImportOOAffixes(IspellDict * Conf, const char *filename) {
+       char            str[BUFSIZ];
+       char            type[BUFSIZ];
+       char            sflag[BUFSIZ];
+       char            mask[BUFSIZ];
+       char            find[BUFSIZ];
+       char            repl[BUFSIZ];
+       bool            isSuffix = false;
+       int                     flag = 0;
+       char            flagflags = 0;
+       FILE       *affix;
+       int     line=0;
+       int     scanread = 0;
+       char            scanbuf[BUFSIZ];
+
+       sprintf(scanbuf,"%%6s %%%ds %%%ds %%%ds %%%ds", BUFSIZ/5, BUFSIZ/5, BUFSIZ/5, BUFSIZ/5);
+
+       if (!(affix = fopen(filename, "r")))
+               return (1);
+       Conf->compoundcontrol = '\t';
+
+       while (fgets(str, sizeof(str), affix))
+       {
+               line++;
+               if ( *str == '\0' || t_isspace(str) || t_iseq(str,'#') )
+                       continue;
+               pg_verifymbstr( str, strlen(str), false);
+
+               if ( STRNCMP(str, "COMPOUNDFLAG")==0 ) {
+                       char *s = str+strlen("COMPOUNDFLAG");
+                       while (*s && t_isspace(s)) s++;
+                       if ( *s && pg_mblen(s) == 1 ) 
+                               Conf->compoundcontrol = *s;
+                       continue;
+               }
+
+               scanread = sscanf(str, scanbuf, type, sflag, find, repl, mask);
+
+               lowerstr(type);
+               if ( scanread<4 || (STRNCMP(type,"sfx") && STRNCMP(type,"pfx")) )
+                       continue;
+
+               if ( scanread == 4 ) {
+                       if ( strlen(sflag) != 1 )
+                               continue;
+                       flag = *sflag;
+                       isSuffix = (STRNCMP(type,"sfx")==0) ? true : false;
+                       lowerstr(find);
+                       if ( t_iseq(find,'y') )
+                               flagflags |= FF_CROSSPRODUCT;
+                       else
+                               flagflags = 0;
+               } else {
+                       if ( strlen(sflag) != 1 || flag != *sflag || flag==0 )
+                               continue;
+                       lowerstr(repl);
+                       lowerstr(find);
+                       lowerstr(mask);
+                       if ( t_iseq(find,'0') )
+                               *find = '\0';
+                       if ( t_iseq(repl,'0') )
+                               *repl = '\0';
+
+                       NIAddAffix(Conf, flag, flagflags, mask, find, repl, isSuffix ? FF_SUFFIX : FF_PREFIX);
+               }
+       }
+
+       fclose(affix);
+
+       return 0;
+}
+
 static int
 MergeAffix(IspellDict * Conf, int a1, int a2)
 {
index fc3240a1d8b5d525c364f2d8ac5465c5cd11ed10..fe79888bf3ec00b6fd09e0d1b7c6d634edc50c0e 100644 (file)
@@ -121,6 +121,7 @@ typedef struct
 
 TSLexeme   *NINormalizeWord(IspellDict * Conf, char *word);
 int                    NIImportAffixes(IspellDict * Conf, const char *filename);
+int                    NIImportOOAffixes(IspellDict * Conf, const char *filename);
 int                    NIImportDictionary(IspellDict * Conf, const char *filename);
 
 int                    NIAddSpell(IspellDict * Conf, const char *word, const char *flag);