/*
- * lexize stream of lexemes
+ * lexize stream of lexemes
* Teodor Sigaev <teodor@sigaev.ru>
*/
#include "postgres.h"
#include "dict.h"
void
-LexizeInit(LexizeData *ld, TSCfgInfo *cfg) {
+LexizeInit(LexizeData * ld, TSCfgInfo * cfg)
+{
ld->cfg = cfg;
ld->curDictId = InvalidOid;
ld->posDict = 0;
ld->towork.head = ld->towork.tail = ld->curSub = NULL;
ld->waste.head = ld->waste.tail = NULL;
- ld->lastRes=NULL;
- ld->tmpRes=NULL;
+ ld->lastRes = NULL;
+ ld->tmpRes = NULL;
}
static void
-LPLAddTail(ListParsedLex *list, ParsedLex *newpl) {
- if ( list->tail ) {
+LPLAddTail(ListParsedLex * list, ParsedLex * newpl)
+{
+ if (list->tail)
+ {
list->tail->next = newpl;
list->tail = newpl;
- } else
+ }
+ else
list->head = list->tail = newpl;
newpl->next = NULL;
}
-static ParsedLex*
-LPLRemoveHead(ListParsedLex *list) {
- ParsedLex *res = list->head;
+static ParsedLex *
+LPLRemoveHead(ListParsedLex * list)
+{
+ ParsedLex *res = list->head;
- if ( list->head )
+ if (list->head)
list->head = list->head->next;
- if ( list->head == NULL )
+ if (list->head == NULL)
list->tail = NULL;
return res;
void
-LexizeAddLemm(LexizeData *ld, int type, char *lemm, int lenlemm) {
- ParsedLex *newpl = (ParsedLex*)palloc( sizeof(ParsedLex) );
+LexizeAddLemm(LexizeData * ld, int type, char *lemm, int lenlemm)
+{
+ ParsedLex *newpl = (ParsedLex *) palloc(sizeof(ParsedLex));
- newpl = (ParsedLex*)palloc( sizeof(ParsedLex) );
+ newpl = (ParsedLex *) palloc(sizeof(ParsedLex));
newpl->type = type;
newpl->lemm = lemm;
newpl->lenlemm = lenlemm;
}
static void
-RemoveHead(LexizeData *ld) {
+RemoveHead(LexizeData * ld)
+{
LPLAddTail(&ld->waste, LPLRemoveHead(&ld->towork));
ld->posDict = 0;
}
static void
-setCorrLex(LexizeData *ld, ParsedLex **correspondLexem) {
- if ( correspondLexem ) {
+setCorrLex(LexizeData * ld, ParsedLex ** correspondLexem)
+{
+ if (correspondLexem)
+ {
*correspondLexem = ld->waste.head;
- } else {
- ParsedLex *tmp, *ptr = ld->waste.head;
+ }
+ else
+ {
+ ParsedLex *tmp,
+ *ptr = ld->waste.head;
- while(ptr) {
+ while (ptr)
+ {
tmp = ptr->next;
pfree(ptr);
ptr = tmp;
}
static void
-moveToWaste(LexizeData *ld, ParsedLex *stop) {
- bool go = true;
-
- while( ld->towork.head && go) {
- if (ld->towork.head == stop) {
+moveToWaste(LexizeData * ld, ParsedLex * stop)
+{
+ bool go = true;
+
+ while (ld->towork.head && go)
+ {
+ if (ld->towork.head == stop)
+ {
ld->curSub = stop->next;
go = false;
}
}
static void
-setNewTmpRes(LexizeData *ld, ParsedLex *lex, TSLexeme *res) {
- if ( ld->tmpRes ) {
- TSLexeme *ptr;
- for( ptr=ld->tmpRes; ptr->lexeme; ptr++ )
- pfree( ptr->lexeme );
- pfree( ld->tmpRes );
+setNewTmpRes(LexizeData * ld, ParsedLex * lex, TSLexeme * res)
+{
+ if (ld->tmpRes)
+ {
+ TSLexeme *ptr;
+
+ for (ptr = ld->tmpRes; ptr->lexeme; ptr++)
+ pfree(ptr->lexeme);
+ pfree(ld->tmpRes);
}
ld->tmpRes = res;
ld->lastRes = lex;
}
-TSLexeme*
-LexizeExec(LexizeData *ld, ParsedLex **correspondLexem) {
- int i;
- ListDictionary *map;
- DictInfo *dict;
- TSLexeme *res;
-
- if ( ld->curDictId == InvalidOid ) {
- /*
- * usial mode: dictionary wants only one word,
- * but we should keep in mind that we should go through
- * all stack
+TSLexeme *
+LexizeExec(LexizeData * ld, ParsedLex ** correspondLexem)
+{
+ int i;
+ ListDictionary *map;
+ DictInfo *dict;
+ TSLexeme *res;
+
+ if (ld->curDictId == InvalidOid)
+ {
+ /*
+ * usial mode: dictionary wants only one word, but we should keep in
+ * mind that we should go through all stack
*/
- while( ld->towork.head ) {
- ParsedLex *curVal = ld->towork.head;
+ while (ld->towork.head)
+ {
+ ParsedLex *curVal = ld->towork.head;
map = ld->cfg->map + curVal->type;
- if (curVal->type == 0 || curVal->type >= ld->cfg->len || map->len == 0 ) {
+ if (curVal->type == 0 || curVal->type >= ld->cfg->len || map->len == 0)
+ {
/* skip this type of lexeme */
RemoveHead(ld);
continue;
}
- for (i = ld->posDict; i < map->len; i++) {
+ for (i = ld->posDict; i < map->len; i++)
+ {
dict = finddict(DatumGetObjectId(map->dict_id[i]));
ld->dictState.isend = ld->dictState.getnext = false;
ld->dictState.private = NULL;
- res = (TSLexeme *) DatumGetPointer( FunctionCall4(
- &(dict->lexize_info),
- PointerGetDatum(dict->dictionary),
- PointerGetDatum(curVal->lemm),
- Int32GetDatum(curVal->lenlemm),
- PointerGetDatum(&ld->dictState)
- ));
-
- if ( ld->dictState.getnext ) {
- /*
- * dictinary wants next word, so setup and store
- * current position and go to multiword mode
+ res = (TSLexeme *) DatumGetPointer(FunctionCall4(
+ &(dict->lexize_info),
+ PointerGetDatum(dict->dictionary),
+ PointerGetDatum(curVal->lemm),
+ Int32GetDatum(curVal->lenlemm),
+ PointerGetDatum(&ld->dictState)
+ ));
+
+ if (ld->dictState.getnext)
+ {
+ /*
+ * dictinary wants next word, so setup and store current
+ * position and go to multiword mode
*/
-
+
ld->curDictId = DatumGetObjectId(map->dict_id[i]);
- ld->posDict = i+1;
+ ld->posDict = i + 1;
ld->curSub = curVal->next;
- if ( res )
+ if (res)
setNewTmpRes(ld, curVal, res);
return LexizeExec(ld, correspondLexem);
}
- if (!res) /* dictionary doesn't know this lexeme */
+ if (!res) /* dictionary doesn't know this lexeme */
continue;
-
+
RemoveHead(ld);
setCorrLex(ld, correspondLexem);
return res;
}
RemoveHead(ld);
- }
- } else { /* curDictId is valid */
+ }
+ }
+ else
+ { /* curDictId is valid */
dict = finddict(ld->curDictId);
-
+
/*
* Dictionary ld->curDictId asks us about following words
*/
- while( ld->curSub ) {
- ParsedLex *curVal = ld->curSub;
+ while (ld->curSub)
+ {
+ ParsedLex *curVal = ld->curSub;
map = ld->cfg->map + curVal->type;
- if (curVal->type != 0) {
- bool dictExists = false;
+ if (curVal->type != 0)
+ {
+ bool dictExists = false;
- if (curVal->type >= ld->cfg->len || map->len == 0 ) {
+ if (curVal->type >= ld->cfg->len || map->len == 0)
+ {
/* skip this type of lexeme */
ld->curSub = curVal->next;
continue;
}
/*
- * We should be sure that current type of lexeme is recognized by
- * our dictinonary: we just check is it exist in
- * list of dictionaries ?
+ * We should be sure that current type of lexeme is recognized
+ * by our dictinonary: we just check is it exist in list of
+ * dictionaries ?
*/
- for(i=0;i < map->len && !dictExists; i++)
- if ( ld->curDictId == DatumGetObjectId(map->dict_id[i]) )
+ for (i = 0; i < map->len && !dictExists; i++)
+ if (ld->curDictId == DatumGetObjectId(map->dict_id[i]))
dictExists = true;
- if ( !dictExists ) {
+ if (!dictExists)
+ {
/*
* Dictionary can't work with current tpe of lexeme,
* return to basic mode and redo all stored lexemes
ld->curDictId = InvalidOid;
return LexizeExec(ld, correspondLexem);
}
- }
-
- ld->dictState.isend = (curVal->type==0) ? true : false;
+ }
+
+ ld->dictState.isend = (curVal->type == 0) ? true : false;
ld->dictState.getnext = false;
- res = (TSLexeme *) DatumGetPointer( FunctionCall4(
- &(dict->lexize_info),
- PointerGetDatum(dict->dictionary),
- PointerGetDatum(curVal->lemm),
- Int32GetDatum(curVal->lenlemm),
- PointerGetDatum(&ld->dictState)
- ));
+ res = (TSLexeme *) DatumGetPointer(FunctionCall4(
+ &(dict->lexize_info),
+ PointerGetDatum(dict->dictionary),
+ PointerGetDatum(curVal->lemm),
+ Int32GetDatum(curVal->lenlemm),
+ PointerGetDatum(&ld->dictState)
+ ));
- if ( ld->dictState.getnext ) {
+ if (ld->dictState.getnext)
+ {
/* Dictionary wants one more */
ld->curSub = curVal->next;
- if ( res )
+ if (res)
setNewTmpRes(ld, curVal, res);
continue;
}
- if ( res || ld->tmpRes ) {
+ if (res || ld->tmpRes)
+ {
/*
- * Dictionary normalizes lexemes,
- * so we remove from stack all used lexemes ,
- * return to basic mode and redo end of stack (if it exists)
+ * Dictionary normalizes lexemes, so we remove from stack all
+ * used lexemes , return to basic mode and redo end of stack
+ * (if it exists)
*/
- if ( res ) {
- moveToWaste( ld, ld->curSub );
- } else {
+ if (res)
+ {
+ moveToWaste(ld, ld->curSub);
+ }
+ else
+ {
res = ld->tmpRes;
- moveToWaste( ld, ld->lastRes );
+ moveToWaste(ld, ld->lastRes);
}
/* reset to initial state */
return res;
}
- /* Dict don't want next lexem and didn't recognize anything,
- redo from ld->towork.head */
+ /*
+ * Dict don't want next lexem and didn't recognize anything, redo
+ * from ld->towork.head
+ */
ld->curDictId = InvalidOid;
return LexizeExec(ld, correspondLexem);
- }
+ }
}
setCorrLex(ld, correspondLexem);
return NULL;
}
-