PG_FUNCTION_INFO_V1(get_covers);
Datum get_covers(PG_FUNCTION_ARGS);
-static float weights[] = {0.1, 0.2, 0.4, 1.0};
+static float weights[] = {0.1f, 0.2f, 0.4f, 1.0f};
#define wpos(wep) ( w[ WEP_GETWEIGHT(wep) ] )
-#define RANK_NO_NORM 0x00
-#define RANK_NORM_LOGLENGTH 0x01
-#define RANK_NORM_LENGTH 0x02
-#define RANK_NORM_EXTDIST 0x04
+#define RANK_NO_NORM 0x00
+#define RANK_NORM_LOGLENGTH 0x01
+#define RANK_NORM_LENGTH 0x02
+#define RANK_NORM_EXTDIST 0x04
#define RANK_NORM_UNIQ 0x08
#define RANK_NORM_LOGUNIQ 0x10
-#define DEF_NORM_METHOD RANK_NO_NORM
+#define DEF_NORM_METHOD RANK_NO_NORM
static float calc_rank_or(float *w, tsvector * t, QUERYTYPE * q);
static float calc_rank_and(float *w, tsvector * t, QUERYTYPE * q);
word_distance(int4 w)
{
if (w > 100)
- return 1e-30;
+ return (float4)1e-30;
return 1.0 / (1.005 + 0.05 * exp(((float4) w) / 1.5 - 2));
}
}
-static char *SortAndUniqOperand = NULL;
-
static int
-compareITEM(const void *a, const void *b)
+compareITEM(const void *a, const void *b, void *arg)
{
+ char *operand = (char *) arg;
+
if ((*(ITEM **) a)->length == (*(ITEM **) b)->length)
- return strncmp(SortAndUniqOperand + (*(ITEM **) a)->distance,
- SortAndUniqOperand + (*(ITEM **) b)->distance,
+ return strncmp(operand + (*(ITEM **) a)->distance,
+ operand + (*(ITEM **) b)->distance,
(*(ITEM **) b)->length);
return ((*(ITEM **) a)->length > (*(ITEM **) b)->length) ? 1 : -1;
if (*size < 2)
return res;
- SortAndUniqOperand = operand;
- qsort(res, *size, sizeof(ITEM **), compareITEM);
+ qsort_arg(res, *size, sizeof(ITEM **), compareITEM, (void *) operand);
ptr = res + 1;
prevptr = res;
while (ptr - res < *size)
{
- if (compareITEM((void *) ptr, (void *) prevptr) != 0)
+ if (compareITEM((void *) ptr, (void *) prevptr, (void *) operand) != 0)
{
prevptr++;
*prevptr = *ptr;
calc_rank_and(w, t, q) : calc_rank_or(w, t, q);
if (res < 0)
- res = 1e-20;
+ res = (float)1e-20;
- if ( (method & RANK_NORM_LOGLENGTH) && t->size>0 )
+ if ((method & RANK_NORM_LOGLENGTH) && t->size > 0)
res /= log((double) (cnt_length(t) + 1)) / log(2.0);
- if ( method & RANK_NORM_LENGTH ) {
+ if (method & RANK_NORM_LENGTH)
+ {
len = cnt_length(t);
- if ( len>0 )
+ if (len > 0)
res /= (float) len;
}
- if ( (method & RANK_NORM_UNIQ) && t->size > 0 )
- res /= (float)( t->size );
+ if ((method & RANK_NORM_UNIQ) && t->size > 0)
+ res /= (float) (t->size);
- if ( (method & RANK_NORM_LOGUNIQ) && t->size > 0 )
+ if ((method & RANK_NORM_LOGUNIQ) && t->size > 0)
res /= log((double) (t->size + 1)) / log(2.0);
return res;
}
}
-typedef struct {
- int pos;
- int p;
- int q;
- DocRepresentation *begin;
- DocRepresentation *end;
-} Extention;
+typedef struct
+{
+ int pos;
+ int p;
+ int q;
+ DocRepresentation *begin;
+ DocRepresentation *end;
+} Extention;
static bool
-Cover(DocRepresentation * doc, int len, QUERYTYPE * query, Extention *ext)
+Cover(DocRepresentation * doc, int len, QUERYTYPE * query, Extention * ext)
{
DocRepresentation *ptr;
int lastpos = ext->pos;
ptr->item[i]->istrue = 1;
if (TS_execute(GETQUERY(query), NULL, true, checkcondition_ITEM))
{
- if (ptr->pos < ext->p) {
+ if (ptr->pos < ext->p)
+ {
ext->begin = ptr;
ext->p = ptr->pos;
}
int len = query->size * 4,
cur = 0;
DocRepresentation *doc;
+ char *operand;
*(uint16 *) POSNULL = lengthof(POSNULL) - 1;
doc = (DocRepresentation *) palloc(sizeof(DocRepresentation) * len);
- SortAndUniqOperand = GETOPERAND(query);
+ operand = GETOPERAND(query);
reset_istrue_flag(query);
for (i = 0; i < query->size; i++)
for (k = 0; k < query->size; k++)
{
kptr = item + k;
- if (k == i || (item[k].type == VAL && compareITEM(&kptr, &iptr) == 0))
+ if (k == i ||
+ (item[k].type == VAL &&
+ compareITEM(&kptr, &iptr, operand) == 0))
{
doc[cur].item[doc[cur].nitem] = item + k;
doc[cur].nitem++;
}
static float4
-calc_rank_cd(float4 *arrdata, tsvector *txt, QUERYTYPE *query, int method) {
+calc_rank_cd(float4 *arrdata, tsvector * txt, QUERYTYPE * query, int method)
+{
DocRepresentation *doc;
- int len,
+ int len,
i,
doclen = 0;
Extention ext;
double Wdoc = 0.0;
double invws[lengthof(weights)];
- double SumDist=0.0, PrevExtPos=0.0, CurExtPos=0.0;
- int NExtent=0;
+ double SumDist = 0.0,
+ PrevExtPos = 0.0,
+ CurExtPos = 0.0;
+ int NExtent = 0;
for (i = 0; i < lengthof(weights); i++)
{
- invws[i] = ((double)((arrdata[i] >= 0) ? arrdata[i] : weights[i]));
+ invws[i] = ((double) ((arrdata[i] >= 0) ? arrdata[i] : weights[i]));
if (invws[i] > 1.0)
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("weight out of range")));
- invws[i] = 1.0/invws[i];
+ invws[i] = 1.0 / invws[i];
}
doc = get_docrep(txt, query, &doclen);
- if (!doc)
+ if (!doc)
return 0.0;
- MemSet( &ext, 0, sizeof(Extention) );
- while (Cover(doc, doclen, query, &ext)) {
- double Cpos = 0.0;
- double InvSum = 0.0;
+ MemSet(&ext, 0, sizeof(Extention));
+ while (Cover(doc, doclen, query, &ext))
+ {
+ double Cpos = 0.0;
+ double InvSum = 0.0;
+ int nNoise;
DocRepresentation *ptr = ext.begin;
- while ( ptr<=ext.end ) {
- InvSum += invws[ ptr->wclass ];
+ while (ptr <= ext.end)
+ {
+ InvSum += invws[ptr->wclass];
ptr++;
}
- Cpos = ((double)( ext.end-ext.begin+1 )) / InvSum;
- Wdoc += Cpos / ( (double)(( 1 + (ext.q - ext.p) - (ext.end - ext.begin) )) );
+ Cpos = ((double) (ext.end - ext.begin + 1)) / InvSum;
+ /*
+ * if doc are big enough then ext.q may be equal to ext.p
+ * due to limit of posional information. In this case we
+ * approximate number of noise word as half cover's
+ * length
+ */
+ nNoise = (ext.q - ext.p) - (ext.end - ext.begin);
+ if ( nNoise < 0 )
+ nNoise = (ext.end - ext.begin) / 2;
+ Wdoc += Cpos / ((double) (1 + nNoise));
- CurExtPos = ((double)(ext.q + ext.p))/2.0;
- if ( NExtent>0 && CurExtPos > PrevExtPos /* prevent devision by zero in a case of multiple lexize */ )
- SumDist += 1.0/( CurExtPos - PrevExtPos );
+ CurExtPos = ((double) (ext.q + ext.p)) / 2.0;
+ if (NExtent > 0 && CurExtPos > PrevExtPos /* prevent devision by
+ * zero in a case of
+ multiple lexize */ )
+ SumDist += 1.0 / (CurExtPos - PrevExtPos);
PrevExtPos = CurExtPos;
- NExtent++;
+ NExtent++;
}
- if ( (method & RANK_NORM_LOGLENGTH) && txt->size > 0 )
+ if ((method & RANK_NORM_LOGLENGTH) && txt->size > 0)
Wdoc /= log((double) (cnt_length(txt) + 1));
- if ( method & RANK_NORM_LENGTH ) {
+ if (method & RANK_NORM_LENGTH)
+ {
len = cnt_length(txt);
- if ( len>0 )
+ if (len > 0)
Wdoc /= (double) len;
}
- if ( (method & RANK_NORM_EXTDIST) && SumDist > 0 )
- Wdoc /= ((double)NExtent) / SumDist;
+ if ((method & RANK_NORM_EXTDIST) && SumDist > 0)
+ Wdoc /= ((double) NExtent) / SumDist;
- if ( (method & RANK_NORM_UNIQ) && txt->size > 0 )
- Wdoc /= (double)( txt->size );
+ if ((method & RANK_NORM_UNIQ) && txt->size > 0)
+ Wdoc /= (double) (txt->size);
- if ( (method & RANK_NORM_LOGUNIQ) && txt->size > 0 )
+ if ((method & RANK_NORM_LOGUNIQ) && txt->size > 0)
Wdoc /= log((double) (txt->size + 1)) / log(2.0);
for (i = 0; i < doclen; i++)
pfree(doc[i].item);
pfree(doc);
- return (float4)Wdoc;
-}
+ return (float4) Wdoc;
+}
Datum
rank_cd(PG_FUNCTION_ARGS)
{
- ArrayType *win = (ArrayType *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
+ ArrayType *win;
tsvector *txt = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(1));
QUERYTYPE *query = (QUERYTYPE *) PG_DETOAST_DATUM_COPY(PG_GETARG_DATUM(2));
int method = DEF_NORM_METHOD;
float4 res;
+ /*
+ * Pre-8.2, rank_cd took just a plain int as its first argument.
+ * It was a mistake to keep the same C function name while changing the
+ * signature, but it's too late to fix that. Instead, do a runtime test
+ * to make sure the expected datatype has been passed. This is needed
+ * to prevent core dumps if tsearch2 function definitions from an old
+ * database are loaded into an 8.2 server.
+ */
+ if (get_fn_expr_argtype(fcinfo->flinfo, 0) != FLOAT4ARRAYOID)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_FUNCTION_DEFINITION),
+ errmsg("rank_cd() now takes real[] as its first argument, not integer")));
+
+ /* now safe to dereference the first arg */
+ win = (ArrayType *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
+
if (ARR_NDIM(win) != 1)
ereport(ERROR,
(errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
if (PG_NARGS() == 4)
method = PG_GETARG_INT32(3);
- res = calc_rank_cd( (float4 *) ARR_DATA_PTR(win), txt, query, method);
+ res = calc_rank_cd((float4 *) ARR_DATA_PTR(win), txt, query, method);
PG_FREE_IF_COPY(win, 0);
PG_FREE_IF_COPY(txt, 1);
{
tsvector *txt = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
QUERYTYPE *query = (QUERYTYPE *) PG_DETOAST_DATUM_COPY(PG_GETARG_DATUM(1));
- float4 res;
+ float4 res;
- res = calc_rank_cd( weights, txt, query, (PG_NARGS() == 3) ? PG_GETARG_DATUM(2) : DEF_NORM_METHOD);
-
- PG_FREE_IF_COPY(txt, 1);
- PG_FREE_IF_COPY(query, 2);
+ res = calc_rank_cd(weights, txt, query, (PG_NARGS() == 3) ? PG_GETARG_DATUM(2) : DEF_NORM_METHOD);
+
+ PG_FREE_IF_COPY(txt, 0);
+ PG_FREE_IF_COPY(query, 1);
PG_RETURN_FLOAT4(res);
}
text *out;
char *cptr;
DocRepresentation *doc;
- int olddwpos = 0;
+ int olddwpos = 0;
int ncover = 1;
Extention ext;
}
qsort((void *) dw, dlen, sizeof(DocWord), compareDocWord);
- MemSet( &ext, 0, sizeof(Extention) );
+ MemSet(&ext, 0, sizeof(Extention));
while (Cover(doc, rlen, query, &ext))
{
dwptr = dw + olddwpos;