]> granicus.if.org Git - postgresql/commit
improve support of agglutinative languages (query with compound words).
authorTeodor Sigaev <teodor@sigaev.ru>
Tue, 25 Jan 2005 15:24:38 +0000 (15:24 +0000)
committerTeodor Sigaev <teodor@sigaev.ru>
Tue, 25 Jan 2005 15:24:38 +0000 (15:24 +0000)
commit324300bc7ccba6988f16915468ee2b870ef3ae5f
tree0922e10a1c417c5bff0100730281ad22add28860
parentd314616d128ba692aec434bd376bc40886f98f7b
improve support of agglutinative languages (query with compound words).

regression=# select to_tsquery( '\'fotballklubber\'');
                   to_tsquery
------------------------------------------------
 'fotball' & 'klubb' | 'fot' & 'ball' & 'klubb'
(1 row)

So, changed interface to dictionaries, lexize method of dictionary shoud return
pointer to aray of TSLexeme structs instead of char**. Last element should
have TSLexeme->lexeme == NULL.

typedef struct {
        /* number of variant of split word , for example
                Word 'fotballklubber' (norwegian) has two varian to split:
                ( fotball, klubb ) and ( fot, ball, klubb ). So, dictionary
                should return:
                nvariant        lexeme
                1               fotball
                1               klubb
                2               fot
                2               ball
                2               klubb

        */
        uint16  nvariant;

        /* currently unused */
        uint16  flags;

        /* C-string */
        char    *lexeme;
} TSLexeme;
12 files changed:
contrib/tsearch2/dict.c
contrib/tsearch2/dict.h
contrib/tsearch2/dict_ex.c
contrib/tsearch2/dict_ispell.c
contrib/tsearch2/dict_snowball.c
contrib/tsearch2/dict_syn.c
contrib/tsearch2/gendict/dict_tmpl.c.IN
contrib/tsearch2/ispell/spell.c
contrib/tsearch2/ispell/spell.h
contrib/tsearch2/query.c
contrib/tsearch2/ts_cfg.c
contrib/tsearch2/ts_cfg.h