{
text *in = PG_GETARG_TEXT_P(1);
DictInfo *dict;
- char **res,
- **ptr;
+ TSLexeme *res,
+ *ptr;
Datum *da;
ArrayType *a;
SET_FUNCOID();
dict = finddict(PG_GETARG_OID(0));
- ptr = res = (char **) DatumGetPointer(
+ ptr = res = (TSLexeme *) DatumGetPointer(
FunctionCall3(&(dict->lexize_info),
PointerGetDatum(dict->dictionary),
PointerGetDatum(VARDATA(in)),
PG_RETURN_NULL();
}
- while (*ptr)
+ while (ptr->lexeme)
ptr++;
da = (Datum *) palloc(sizeof(Datum) * (ptr - res + 1));
ptr = res;
- while (*ptr)
+ while (ptr->lexeme)
{
- da[ptr - res] = PointerGetDatum(char2text(*ptr));
+ da[ptr - res] = PointerGetDatum(char2text(ptr->lexeme));
ptr++;
}
);
ptr = res;
- while (*ptr)
+ while (ptr->lexeme)
{
pfree(DatumGetPointer(da[ptr - res]));
- pfree(*ptr);
+ pfree(ptr->lexeme);
ptr++;
}
pfree(res);
void parse_cfgdict(text *in, Map ** m);
+/* return struct for any lexize function */
+typedef struct {
+ /* number of variant of split word , for example
+ Word 'fotballklubber' (norwegian) has two varian to split:
+ ( fotball, klubb ) and ( fot, ball, klubb ). So, dictionary
+ should return:
+ nvariant lexeme
+ 1 fotball
+ 1 klubb
+ 2 fot
+ 2 ball
+ 2 klubb
+
+ */
+ uint16 nvariant;
+
+ /* currently unused */
+ uint16 flags;
+
+ /* C-string */
+ char *lexeme;
+} TSLexeme;
+
#endif
DictExample *d = (DictExample *) PG_GETARG_POINTER(0);
char *in = (char *) PG_GETARG_POINTER(1);
char *txt = pnstrdup(in, PG_GETARG_INT32(2));
- char **res = palloc(sizeof(char *) * 2);
+ TSLexeme *res = palloc(sizeof(TSLexeme) * 2);
+
+ memset(res,0,sizeof(TSLexeme) * 2);
if (*txt == '\0' || searchstoplist(&(d->stoplist), txt))
{
pfree(txt);
- res[0] = NULL;
}
else
- res[0] = txt;
- res[1] = NULL;
+ res[0].lexeme = txt;
PG_RETURN_POINTER(res);
}
DictISpell *d = (DictISpell *) PG_GETARG_POINTER(0);
char *in = (char *) PG_GETARG_POINTER(1);
char *txt;
- char **res;
- char **ptr,
- **cptr;
+ TSLexeme *res;
+ TSLexeme *ptr,
+ *cptr;
if (!PG_GETARG_INT32(2))
PG_RETURN_POINTER(NULL);
- res = palloc(sizeof(char *) * 2);
txt = pnstrdup(in, PG_GETARG_INT32(2));
res = NINormalizeWord(&(d->obj), txt);
pfree(txt);
PG_RETURN_POINTER(NULL);
ptr = cptr = res;
- while (*ptr)
+ while (ptr->lexeme)
{
- if (searchstoplist(&(d->stoplist), *ptr))
+ if (searchstoplist(&(d->stoplist), ptr->lexeme))
{
- pfree(*ptr);
- *ptr = NULL;
+ pfree(ptr->lexeme);
+ ptr->lexeme = NULL;
ptr++;
}
else
{
- *cptr = *ptr;
+ memcpy(cptr, ptr, sizeof(TSLexeme));
cptr++;
ptr++;
}
}
- *cptr = NULL;
+ cptr->lexeme = NULL;
PG_RETURN_POINTER(res);
}
DictSnowball *d = (DictSnowball *) PG_GETARG_POINTER(0);
char *in = (char *) PG_GETARG_POINTER(1);
char *txt = pnstrdup(in, PG_GETARG_INT32(2));
- char **res = palloc(sizeof(char *) * 2);
+ TSLexeme *res = palloc(sizeof(TSLexeme) * 2);
+ memset(res, 0, sizeof(TSLexeme) * 2);
if (*txt == '\0' || searchstoplist(&(d->stoplist), txt))
{
pfree(txt);
- res[0] = NULL;
}
else
{
memcpy(txt, d->z->p, d->z->l);
txt[d->z->l] = '\0';
}
- res[0] = txt;
+ res->lexeme = txt;
}
- res[1] = NULL;
-
PG_RETURN_POINTER(res);
}
char *in = (char *) PG_GETARG_POINTER(1);
Syn key,
*found;
- char **res = NULL;
+ TSLexeme *res = NULL;
if (!PG_GETARG_INT32(2))
PG_RETURN_POINTER(NULL);
if (!found)
PG_RETURN_POINTER(NULL);
- res = palloc(sizeof(char *) * 2);
-
- res[0] = pstrdup(found->out);
- res[1] = NULL;
+ res = palloc(sizeof(TSLexeme) * 2);
+ memset(res,0,sizeof(TSLexeme) * 2);
+ res[0].lexeme = pstrdup(found->out);
PG_RETURN_POINTER(res);
}
HASINIT DictExample *d = (DictExample*)PG_GETARG_POINTER(0);
char *in = (char*)PG_GETARG_POINTER(1);
char *txt = pnstrdup(in, PG_GETARG_INT32(2));
- char **res=palloc(sizeof(char*)*2);
+ TSLexeme *res=palloc(sizeof(TSLexeme*)*2);
- /* Your INIT dictionary code */
+ /* Your LEXIZE dictionary code */
HASINIT if ( *txt=='\0' || searchstoplist(&(d->stoplist),txt) ) {
HASINIT pfree(txt);
-HASINIT res[0]=NULL;
+HASINIT res[0].lexeme=NULL;
HASINIT } else
- res[0]=txt;
- res[1]=NULL;
+ res[0].lexeme=txt;
+ res[1].lexeme=NULL;
PG_RETURN_POINTER(res);
}
return var;
}
-char **
+TSLexeme *
NINormalizeWord(IspellDict * Conf, char *word)
{
char **res = NormalizeSubWord(Conf, word, 0);
+ TSLexeme *lcur=NULL, *lres=NULL;
+ u_int16_t NVariant=1;
+
+ if (res) {
+ char **ptr = res;
+ lcur = lres = (TSLexeme*)palloc( MAX_NORM * sizeof(TSLexeme) );
+ while(*ptr) {
+ lcur->lexeme=*ptr;
+ lcur->flags=0;
+ lcur->nvariant = NVariant++;
+ lcur++;
+ ptr++;
+ }
+ lcur->lexeme=NULL;
+ pfree(res);
+ }
if (Conf->compoundcontrol != '\t')
{
int wordlen = strlen(word);
SplitVar *ptr,
*var = SplitToVariants(Conf, NULL, NULL, word, wordlen, 0, -1);
- char **cur = res;
int i;
while (var)
if (subres)
{
- char **ptr = subres;
+ char **subptr = subres;
+
+ if ( !lcur )
+ lcur = lres = (TSLexeme*)palloc( MAX_NORM * sizeof(TSLexeme) );
+
+ while(*subptr) {
+ for(i=0;i<var->nstem-1;i++) {
+ lcur->lexeme=(subptr==subres) ? var->stem[ i ] : pstrdup(var->stem[ i ]);
+ lcur->flags=0;
+ lcur->nvariant = NVariant;
+ lcur++;
+ }
- if (cur)
- {
- while (*cur)
- cur++;
- }
- else
- res = cur = (char **) palloc(MAX_NORM * sizeof(char *));
+ lcur->lexeme=*subptr;
+ lcur->flags=0;
+ lcur->nvariant = NVariant;
+ lcur++;
+ subptr++;
+ NVariant++;
+ }
- for (i = 0; i < var->nstem - 1; i++)
- {
- *cur = var->stem[i];
- cur++;
- }
- while (*ptr)
- {
- *cur = *ptr;
- cur++;
- ptr++;
- }
- *cur = NULL;
+ lcur->lexeme=NULL;
pfree(subres);
var->stem[0] = NULL;
+ pfree( var->stem[ var->nstem-1 ] );
}
}
var = ptr;
}
}
- return res;
+ return lres;
}
#include <sys/types.h>
#include "regex/regex.h"
-#include "regis.h"
#include "c.h"
-
+#include "regis.h"
+#include "dict.h"
+
struct SPNode;
} IspellDict;
-char **NINormalizeWord(IspellDict * Conf, char *word);
+TSLexeme *NINormalizeWord(IspellDict * Conf, char *word);
int NIImportAffixes(IspellDict * Conf, const char *filename);
int NIImportDictionary(IspellDict * Conf, const char *filename);
{
int4 count = 0;
PRSTEXT prs;
+ uint32 variant, pos, cntvar=0, cntpos=0, cnt=0;
prs.lenwords = 32;
prs.curwords = 0;
parsetext_v2(findcfg(state->cfg_id), &prs, strval, lenval);
- for (count = 0; count < prs.curwords; count++)
- {
- pushval_asis(state, VAL, prs.words[count].word, prs.words[count].len, weight);
- pfree(prs.words[count].word);
- if (count)
- pushquery(state, OPR, (int4) '&', 0, 0, 0);
- }
- pfree(prs.words);
+ if ( prs.curwords>0 ) {
+
+ while (count < prs.curwords) {
+ pos = prs.words[count].pos.pos;
+ cntvar=0;
+ while(count < prs.curwords && pos==prs.words[count].pos.pos) {
+ variant = prs.words[count].nvariant;
+
+ cnt=0;
+ while(count < prs.curwords && pos==prs.words[count].pos.pos && variant==prs.words[count].nvariant) {
+
+ pushval_asis(state, VAL, prs.words[count].word, prs.words[count].len, weight);
+ pfree(prs.words[count].word);
+ if ( cnt )
+ pushquery(state, OPR, (int4) '&', 0, 0, 0);
+ cnt++;
+ count++;
+ }
+
+ if ( cntvar )
+ pushquery(state, OPR, (int4) '|', 0, 0, 0);
+ cntvar++;
+ }
+
+ if (cntpos)
+ pushquery(state, OPR, (int4) '&', 0, 0, 0);
+
+ cntpos++;
+ }
+
+ pfree(prs.words);
- /* XXX */
- if (prs.curwords == 0)
+ } else
pushval_asis(state, VALSTOP, NULL, 0, 0);
}
for (i = 0; i < cfg->map[type].len; i++)
{
DictInfo *dict = finddict(DatumGetObjectId(cfg->map[type].dict_id[i]));
- char **norms,
- **ptr;
+ TSLexeme *norms,
+ *ptr;
- norms = ptr = (char **) DatumGetPointer(
+ norms = ptr = (TSLexeme *) DatumGetPointer(
FunctionCall3(
&(dict->lexize_info),
PointerGetDatum(dict->dictionary),
prs->pos++; /* set pos */
- while (*ptr)
+ while (ptr->lexeme)
{
if (prs->curwords == prs->lenwords)
{
prs->words = (TSWORD *) repalloc((void *) prs->words, prs->lenwords * sizeof(TSWORD));
}
- prs->words[prs->curwords].len = strlen(*ptr);
- prs->words[prs->curwords].word = *ptr;
+ prs->words[prs->curwords].len = strlen(ptr->lexeme);
+ prs->words[prs->curwords].word = ptr->lexeme;
+ prs->words[prs->curwords].nvariant = ptr->nvariant;
prs->words[prs->curwords].alen = 0;
prs->words[prs->curwords].pos.pos = LIMITPOS(prs->pos);
ptr++;
for (i = 0; i < cfg->map[type].len; i++)
{
DictInfo *dict = finddict(DatumGetObjectId(cfg->map[type].dict_id[i]));
- char **norms,
- **ptr;
+ TSLexeme *norms,
+ *ptr;
- norms = ptr = (char **) DatumGetPointer(
+ norms = ptr = (TSLexeme *) DatumGetPointer(
FunctionCall3(
&(dict->lexize_info),
PointerGetDatum(dict->dictionary),
if (!norms) /* dictionary doesn't know this lexem */
continue;
- while (*ptr)
+ while (ptr->lexeme)
{
- hlfinditem(prs, query, *ptr, strlen(*ptr));
- pfree(*ptr);
+ hlfinditem(prs, query, ptr->lexeme, strlen(ptr->lexeme));
+ pfree(ptr->lexeme);
ptr++;
}
pfree(norms);
typedef struct
{
uint16 len;
+ uint16 nvariant;
union
{
uint16 pos;