Conf->naffixes++;
}
+
+/* Parsing states for parse_affentry() and friends */
#define PAE_WAIT_MASK 0
-#define PAE_INMASK 1
+#define PAE_INMASK 1
#define PAE_WAIT_FIND 2
-#define PAE_INFIND 3
+#define PAE_INFIND 3
#define PAE_WAIT_REPL 4
-#define PAE_INREPL 5
+#define PAE_INREPL 5
+#define PAE_WAIT_TYPE 6
+#define PAE_WAIT_FLAG 7
+/*
+ * Parse next space-separated field of an .affix file line.
+ *
+ * *str is the input pointer (will be advanced past field)
+ * next is where to copy the field value to, with null termination
+ *
+ * The buffer at "next" must be of size BUFSIZ; we truncate the input to fit.
+ *
+ * Returns TRUE if we found a field, FALSE if not.
+ */
+static bool
+get_nextfield(char **str, char *next)
+{
+ int state = PAE_WAIT_MASK;
+ int avail = BUFSIZ;
+
+ while (**str)
+ {
+ if (state == PAE_WAIT_MASK)
+ {
+ if (t_iseq(*str, '#'))
+ return false;
+ else if (!t_isspace(*str))
+ {
+ int clen = pg_mblen(*str);
+
+ if (clen < avail)
+ {
+ COPYCHAR(next, *str);
+ next += clen;
+ avail -= clen;
+ }
+ state = PAE_INMASK;
+ }
+ }
+ else /* state == PAE_INMASK */
+ {
+ if (t_isspace(*str))
+ {
+ *next = '\0';
+ return true;
+ }
+ else
+ {
+ int clen = pg_mblen(*str);
+
+ if (clen < avail)
+ {
+ COPYCHAR(next, *str);
+ next += clen;
+ avail -= clen;
+ }
+ }
+ }
+ *str += pg_mblen(*str);
+ }
+
+ *next = '\0';
+
+ return (state == PAE_INMASK); /* OK if we got a nonempty field */
+}
+
+/*
+ * Parses entry of an .affix file of MySpell or Hunspell format.
+ *
+ * An .affix file entry has the following format:
+ * - header
+ * <type> <flag> <cross_flag> <flag_count>
+ * - fields after header:
+ * <type> <flag> <find> <replace> <mask>
+ *
+ * str is the input line
+ * field values are returned to type etc, which must be buffers of size BUFSIZ.
+ *
+ * Returns number of fields found; any omitted fields are set to empty strings.
+ */
+static int
+parse_ooaffentry(char *str, char *type, char *flag, char *find,
+ char *repl, char *mask)
+{
+ int state = PAE_WAIT_TYPE;
+ int fields_read = 0;
+ bool valid = false;
+
+ *type = *flag = *find = *repl = *mask = '\0';
+
+ while (*str)
+ {
+ switch (state)
+ {
+ case PAE_WAIT_TYPE:
+ valid = get_nextfield(&str, type);
+ state = PAE_WAIT_FLAG;
+ break;
+ case PAE_WAIT_FLAG:
+ valid = get_nextfield(&str, flag);
+ state = PAE_WAIT_FIND;
+ break;
+ case PAE_WAIT_FIND:
+ valid = get_nextfield(&str, find);
+ state = PAE_WAIT_REPL;
+ break;
+ case PAE_WAIT_REPL:
+ valid = get_nextfield(&str, repl);
+ state = PAE_WAIT_MASK;
+ break;
+ case PAE_WAIT_MASK:
+ valid = get_nextfield(&str, mask);
+ state = -1; /* force loop exit */
+ break;
+ default:
+ elog(ERROR, "unrecognized state in parse_ooaffentry: %d",
+ state);
+ break;
+ }
+ if (valid)
+ fields_read++;
+ else
+ break; /* early EOL */
+ if (state < 0)
+ break; /* got all fields */
+ }
+
+ return fields_read;
+}
+
+/*
+ * Parses entry of an .affix file of Ispell format
+ *
+ * An .affix file entry has the following format:
+ * <mask> > [-<find>,]<replace>
+ */
static bool
parse_affentry(char *str, char *mask, char *find, char *repl)
{
int flag = 0;
char flagflags = 0;
tsearch_readline_state trst;
- int scanread = 0;
- char scanbuf[BUFSIZ];
char *recoded;
/* read file to find any flag */
}
tsearch_readline_end(&trst);
- sprintf(scanbuf, "%%6s %%%ds %%%ds %%%ds %%%ds", BUFSIZ / 5, BUFSIZ / 5, BUFSIZ / 5, BUFSIZ / 5);
-
if (!tsearch_readline_begin(&trst, filename))
ereport(ERROR,
(errcode(ERRCODE_CONFIG_FILE_ERROR),
while ((recoded = tsearch_readline(&trst)) != NULL)
{
+ int fields_read;
+
if (*recoded == '\0' || t_isspace(recoded) || t_iseq(recoded, '#'))
goto nextline;
- scanread = sscanf(recoded, scanbuf, type, sflag, find, repl, mask);
+ fields_read = parse_ooaffentry(recoded, type, sflag, find, repl, mask);
if (ptype)
pfree(ptype);
ptype = lowerstr_ctx(Conf, type);
- if (scanread < 4 || (STRNCMP(ptype, "sfx") && STRNCMP(ptype, "pfx")))
+ if (fields_read < 4 ||
+ (STRNCMP(ptype, "sfx") != 0 && STRNCMP(ptype, "pfx") != 0))
goto nextline;
- if (scanread == 4)
+ if (fields_read == 4)
{
if (strlen(sflag) != 1)
goto nextline;
if (strlen(sflag) != 1 || flag != *sflag || flag == 0)
goto nextline;
prepl = lowerstr_ctx(Conf, repl);
- /* affix flag */
+ /* Find position of '/' in lowercased string "prepl" */
if ((ptr = strchr(prepl, '/')) != NULL)
{
+ /*
+ * Here we use non-lowercased string "repl". We need position
+ * of '/' in "repl".
+ */
*ptr = '\0';
ptr = repl + (ptr - prepl) + 1;
while (*ptr)
if (STRNCMP(pstr, "compoundwords") == 0)
{
+ /* Find position in lowercased string "pstr" */
s = findchar(pstr, 'l');
if (s)
{
- s = recoded + (s - pstr); /* we need non-lowercased
- * string */
+ /* Here we use non-lowercased string "recoded" */
+ s = recoded + (s - pstr);
while (*s && !t_isspace(s))
s += pg_mblen(s);
while (*s && t_isspace(s))