From: Heikki Linnakangas Date: Wed, 8 May 2013 17:57:42 +0000 (+0300) Subject: The data structure used in unaccent is a trie, not suffix tree. X-Git-Tag: REL9_3_BETA2~113 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=4b06c1820a1b96769ea7447a0fc8e0edabbf57f5;p=postgresql The data structure used in unaccent is a trie, not suffix tree. Fix the term used in variable and struct names, and comments. Alexander Korotkov --- diff --git a/contrib/unaccent/unaccent.c b/contrib/unaccent/unaccent.c index 4182376030..5c122c1832 100644 --- a/contrib/unaccent/unaccent.c +++ b/contrib/unaccent/unaccent.c @@ -23,30 +23,29 @@ PG_MODULE_MAGIC; /* - * Unaccent dictionary uses uncompressed suffix tree to find a - * character to replace. Each node of tree is an array of - * SuffixChar struct with length = 256 (n-th element of array + * Unaccent dictionary uses a trie to find a character to replace. Each node of + * the trie is an array of 256 TrieChar structs (n-th element of array * corresponds to byte) */ -typedef struct SuffixChar +typedef struct TrieChar { - struct SuffixChar *nextChar; + struct TrieChar *nextChar; char *replaceTo; int replacelen; -} SuffixChar; +} TrieChar; /* - * placeChar - put str into tree's structure, byte by byte. + * placeChar - put str into trie's structure, byte by byte. */ -static SuffixChar * -placeChar(SuffixChar *node, unsigned char *str, int lenstr, char *replaceTo, int replacelen) +static TrieChar * +placeChar(TrieChar *node, unsigned char *str, int lenstr, char *replaceTo, int replacelen) { - SuffixChar *curnode; + TrieChar *curnode; if (!node) { - node = palloc(sizeof(SuffixChar) * 256); - memset(node, 0, sizeof(SuffixChar) * 256); + node = palloc(sizeof(TrieChar) * 256); + memset(node, 0, sizeof(TrieChar) * 256); } curnode = node + *str; @@ -71,13 +70,14 @@ placeChar(SuffixChar *node, unsigned char *str, int lenstr, char *replaceTo, int } /* - * initSuffixTree - create suffix tree from file. Function converts - * UTF8-encoded file into current encoding. + * initTrie - create trie from file. + * + * Function converts UTF8-encoded file into current encoding. */ -static SuffixChar * -initSuffixTree(char *filename) +static TrieChar * +initTrie(char *filename) { - SuffixChar *volatile rootSuffixTree = NULL; + TrieChar *volatile rootTrie = NULL; MemoryContext ccxt = CurrentMemoryContext; tsearch_readline_state trst; volatile bool skip; @@ -161,7 +161,7 @@ initSuffixTree(char *filename) } if (state >= 3) - rootSuffixTree = placeChar(rootSuffixTree, + rootTrie = placeChar(rootTrie, (unsigned char *) src, srclen, trg, trglen); @@ -192,14 +192,14 @@ initSuffixTree(char *filename) tsearch_readline_end(&trst); - return rootSuffixTree; + return rootTrie; } /* - * findReplaceTo - find multibyte character in tree + * findReplaceTo - find multibyte character in trie */ -static SuffixChar * -findReplaceTo(SuffixChar *node, unsigned char *src, int srclen) +static TrieChar * +findReplaceTo(TrieChar *node, unsigned char *src, int srclen) { while (node) { @@ -221,7 +221,7 @@ Datum unaccent_init(PG_FUNCTION_ARGS) { List *dictoptions = (List *) PG_GETARG_POINTER(0); - SuffixChar *rootSuffixTree = NULL; + TrieChar *rootTrie = NULL; bool fileloaded = false; ListCell *l; @@ -235,7 +235,7 @@ unaccent_init(PG_FUNCTION_ARGS) ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("multiple Rules parameters"))); - rootSuffixTree = initSuffixTree(defGetString(defel)); + rootTrie = initTrie(defGetString(defel)); fileloaded = true; } else @@ -254,7 +254,7 @@ unaccent_init(PG_FUNCTION_ARGS) errmsg("missing Rules parameter"))); } - PG_RETURN_POINTER(rootSuffixTree); + PG_RETURN_POINTER(rootTrie); } PG_FUNCTION_INFO_V1(unaccent_lexize); @@ -262,21 +262,21 @@ Datum unaccent_lexize(PG_FUNCTION_ARGS); Datum unaccent_lexize(PG_FUNCTION_ARGS) { - SuffixChar *rootSuffixTree = (SuffixChar *) PG_GETARG_POINTER(0); + TrieChar *rootTrie = (TrieChar *) PG_GETARG_POINTER(0); char *srcchar = (char *) PG_GETARG_POINTER(1); int32 len = PG_GETARG_INT32(2); char *srcstart, *trgchar = NULL; int charlen; TSLexeme *res = NULL; - SuffixChar *node; + TrieChar *node; srcstart = srcchar; while (srcchar - srcstart < len) { charlen = pg_mblen(srcchar); - node = findReplaceTo(rootSuffixTree, (unsigned char *) srcchar, charlen); + node = findReplaceTo(rootTrie, (unsigned char *) srcchar, charlen); if (node && node->replaceTo) { if (!res)