do
{
- char src[4096];
- char trg[4096];
- int srclen;
- int trglen;
- char *line = NULL;
-
+ /*
+ * pg_do_encoding_conversion() (called by tsearch_readline()) will
+ * emit exception if it finds untranslatable characters in current
+ * locale. We just skip such lines, continuing with the next.
+ */
skip = true;
PG_TRY();
{
- /*
- * pg_do_encoding_conversion() (called by tsearch_readline()) will
- * emit exception if it finds untranslatable characters in current
- * locale. We just skip such characters.
- */
+ char *line;
+
while ((line = tsearch_readline(&trst)) != NULL)
{
- if (sscanf(line, "%s\t%s\n", src, trg) != 2)
- continue;
+ /*
+ * The format of each line must be "src trg" where src and trg
+ * are sequences of one or more non-whitespace characters,
+ * separated by whitespace. Whitespace at start or end of
+ * line is ignored.
+ */
+ int state;
+ char *ptr;
+ char *src = NULL;
+ char *trg = NULL;
+ int ptrlen;
+ int srclen = 0;
+ int trglen = 0;
+
+ state = 0;
+ for (ptr = line; *ptr; ptr += ptrlen)
+ {
+ ptrlen = pg_mblen(ptr);
+ /* ignore whitespace, but end src or trg */
+ if (t_isspace(ptr))
+ {
+ if (state == 1)
+ state = 2;
+ else if (state == 3)
+ state = 4;
+ continue;
+ }
+ switch (state)
+ {
+ case 0:
+ /* start of src */
+ src = ptr;
+ srclen = ptrlen;
+ state = 1;
+ break;
+ case 1:
+ /* continue src */
+ srclen += ptrlen;
+ break;
+ case 2:
+ /* start of trg */
+ trg = ptr;
+ trglen = ptrlen;
+ state = 3;
+ break;
+ case 3:
+ /* continue trg */
+ trglen += ptrlen;
+ break;
+ default:
+ /* bogus line format */
+ state = -1;
+ break;
+ }
+ }
- srclen = strlen(src);
- trglen = strlen(trg);
+ if (state >= 3)
+ rootSuffixTree = placeChar(rootSuffixTree,
+ (unsigned char *) src, srclen,
+ trg, trglen);
- rootSuffixTree = placeChar(rootSuffixTree,
- (unsigned char *) src, srclen,
- trg, trglen);
- skip = false;
pfree(line);
}
+ skip = false;
}
PG_CATCH();
{