(1 row)
insert into test_tsvector (t) values ('foo bar foo the over foo qq bar');
+drop trigger tsvectorupdate on test_tsvector;
select * from stat('select a from test_tsvector') order by ndoc desc, nentry desc, word;
word | ndoc | nentry
-----------+------+--------
qwerti | 1 | 1
(1146 rows)
+insert into test_tsvector values ('1', 'a:1a,2,3b b:5a,6a,7c,8');
+insert into test_tsvector values ('1', 'a:1a,2,3c b:5a,6b,7c,8b');
+select * from stat('select a from test_tsvector','a') order by ndoc desc, nentry desc, word;
+ word | ndoc | nentry
+------+------+--------
+ b | 2 | 3
+ a | 2 | 2
+(2 rows)
+
+select * from stat('select a from test_tsvector','b') order by ndoc desc, nentry desc, word;
+ word | ndoc | nentry
+------+------+--------
+ b | 1 | 2
+ a | 1 | 1
+(2 rows)
+
+select * from stat('select a from test_tsvector','c') order by ndoc desc, nentry desc, word;
+ word | ndoc | nentry
+------+------+--------
+ b | 2 | 2
+ a | 1 | 1
+(2 rows)
+
+select * from stat('select a from test_tsvector','d') order by ndoc desc, nentry desc, word;
+ word | ndoc | nentry
+-----------+------+--------
+ a | 2 | 2
+ copyright | 2 | 2
+ foo | 1 | 3
+ bar | 1 | 2
+ 345 | 1 | 1
+ b | 1 | 1
+ qq | 1 | 1
+ qwerti | 1 | 1
+(8 rows)
+
+select * from stat('select a from test_tsvector','ad') order by ndoc desc, nentry desc, word;
+ word | ndoc | nentry
+-----------+------+--------
+ a | 2 | 4
+ b | 2 | 4
+ copyright | 2 | 2
+ foo | 1 | 3
+ bar | 1 | 2
+ 345 | 1 | 1
+ qq | 1 | 1
+ qwerti | 1 | 1
+(8 rows)
+
select reset_tsearch();
NOTICE: TSearch cache cleaned
reset_tsearch
(5 rows)
--check ordering
-drop trigger tsvectorupdate on test_tsvector;
insert into test_tsvector values (null, null);
select a is null, a from test_tsvector order by a;
?column? | a
f |
f | '345':1 'qwerti':2 'copyright':3
f | 'qq':7 'bar':2,8 'foo':1,3,6 'copyright':9
+ f | 'a':1A,2,3C 'b':5A,6B,7C,8B
+ f | 'a':1A,2,3B 'b':5A,6A,7C,8
f | '7w' 'ch' 'd7' 'eo' 'gw' 'i4' 'lq' 'o6' 'qt' 'y0'
f | 'ar' 'ei' 'kq' 'ma' 'qa' 'qh' 'qq' 'qz' 'rx' 'st'
f | 'gs' 'i6' 'i9' 'j2' 'l0' 'oq' 'qx' 'sc' 'xe' 'yu'
f | '1b' '42' 'a7' 'ab' 'ak' 'ap' 'at' 'av' 'ay' 'b0' 'b9' 'bb' 'bp' 'bu' 'bz' 'cq' 'da' 'de' 'dn' 'e0' 'eb' 'ef' 'eg' 'ek' 'eq' 'er' 'eu' 'ey' 'fn' 'ft' 'gg' 'h4' 'hk' 'hl' 'i7' 'ig' 'ik' 'ip' 'ir' 'iu' 'iw' 'jr' 'jw' 'jx' 'kg' 'lc' 'lg' 'm0' 'na' 'np' 'om' 'on' 'oz' 'pg' 'pn' 'ps' 'pt' 'pz' 'q3' 'q6' 'qa' 'qb' 'ql' 'qq' 'qt' 'qv' 'qw' 'qy' 'r8' 'rf' 'ri' 'rk' 'rl' 'rw' 'sg' 'si' 'sp' 'sw' 'ta' 'th' 'ua' 'uj' 'uu' 'uv' 'uz' 'vj' 'vk' 'vm' 'wc' 'wf' 'wh' 'wn' 'wo' 'ww' 'xb' 'xk' 'xt' 'xw' 'y7' 'ye' 'yl' 'yt' 'yw' 'z4' 'z7' 'zc' 'zw'
f | '1h' '3s' 'ab' 'ae' 'ax' 'b1' 'bz' 'cy' 'dk' 'dq' 'ds' 'du' 'e8' 'ef' 'ej' 'ek' 'ex' 'f1' 'fe' 'ff' 'fn' 'fo' 'ft' 'fx' 'ge' 'go' 'gz' 'h6' 'hz' 'i2' 'iv' 'iy' 'j5' 'j6' 'ke' 'kf' 'lh' 'lr' 'mc' 'mj' 'na' 'ng' 'oh' 'om' 'oy' 'p2' 'pi' 'pk' 'py' 'q3' 'qb' 'qc' 'qg' 'qn' 'qo' 'qq' 'qu' 'qw' 'qx' 'qy' 'qz' 'r1' 'rk' 'rl' 'rq' 'rs' 'rt' 'ry' 'rz' 'sk' 'sl' 'so' 't9' 'td' 'te' 'tn' 'tw' 'tz' 'ud' 'uk' 'uo' 'uq' 'uw' 'ux' 'uy' 'v1' 'vg' 'vq' 'w4' 'w9' 'wa' 'wg' 'wj' 'wm' 'wo' 'wr' 'ww' 'wy' 'xf' 'xg' 'y9' 'yh' 'yi' 'yk' 'ym' 'yq' 'yv' 'zm'
t |
-(512 rows)
+(514 rows)
select rank(' a:1 s:2 d g'::tsvector, 'a & s');
insert into test_tsvector (t) values ('foo bar foo the over foo qq bar');
+drop trigger tsvectorupdate on test_tsvector;
select * from stat('select a from test_tsvector') order by ndoc desc, nentry desc, word;
+insert into test_tsvector values ('1', 'a:1a,2,3b b:5a,6a,7c,8');
+insert into test_tsvector values ('1', 'a:1a,2,3c b:5a,6b,7c,8b');
+select * from stat('select a from test_tsvector','a') order by ndoc desc, nentry desc, word;
+select * from stat('select a from test_tsvector','b') order by ndoc desc, nentry desc, word;
+select * from stat('select a from test_tsvector','c') order by ndoc desc, nentry desc, word;
+select * from stat('select a from test_tsvector','d') order by ndoc desc, nentry desc, word;
+select * from stat('select a from test_tsvector','ad') order by ndoc desc, nentry desc, word;
select reset_tsearch();
select to_tsquery('default', 'skies & books');
select * from ts_debug('Tsearch module for PostgreSQL 7.3.3');
--check ordering
-drop trigger tsvectorupdate on test_tsvector;
insert into test_tsvector values (null, null);
select a is null, a from test_tsvector order by a;
tsstat_in(PG_FUNCTION_ARGS)
{
tsstat *stat = palloc(STATHDRSIZE);
-
+
stat->len = STATHDRSIZE;
stat->size = 0;
+ stat->weight = 0;
PG_RETURN_POINTER(stat);
}
PG_RETURN_NULL();
}
+static int
+check_weight(tsvector *txt, WordEntry *wptr, int8 weight) {
+ int len = POSDATALEN(txt, wptr);
+ int num=0;
+ WordEntryPos *ptr = POSDATAPTR(txt, wptr);
+
+ while (len--) {
+ if (weight & (1 << ptr->weight))
+ num++;
+ ptr++;
+ }
+ return num;
+}
+
static WordEntry **
SEI_realloc(WordEntry ** in, uint32 *len)
{
totallen = CALCSTATSIZE(nentry, slen);
newstat = palloc(totallen);
newstat->len = totallen;
+ newstat->weight = stat->weight;
newstat->size = nentry;
memcpy(STATSTRPTR(newstat), STATSTRPTR(stat), STATSTRSIZE(stat));
}
nptr = STATPTR(newstat) + (StopLow - STATPTR(stat));
memcpy(STATPTR(newstat), STATPTR(stat), sizeof(StatEntry) * (StopLow - STATPTR(stat)));
- nptr->nentry = POSDATALEN(txt, *ptr);
- if (nptr->nentry == 0)
+ if ( (*ptr)->haspos ) {
+ nptr->nentry = ( stat->weight ) ? check_weight(txt, *ptr, stat->weight) : POSDATALEN(txt, *ptr);
+ } else
nptr->nentry = 1;
nptr->ndoc = 1;
nptr->len = (*ptr)->len;
}
else
{
- nptr->nentry = POSDATALEN(txt, *ptr);
- if (nptr->nentry == 0)
+ if ( (*ptr)->haspos ) {
+ nptr->nentry = ( stat->weight ) ? check_weight(txt, *ptr, stat->weight) : POSDATALEN(txt, *ptr);
+ } else
nptr->nentry = 1;
nptr->ndoc = 1;
nptr->len = (*ptr)->len;
while (ptr - entry < len)
{
- nptr->nentry = POSDATALEN(txt, *ptr);
- if (nptr->nentry == 0)
+ if ( (*ptr)->haspos ) {
+ nptr->nentry = ( stat->weight ) ? check_weight(txt, *ptr, stat->weight) : POSDATALEN(txt, *ptr);
+ } else
nptr->nentry = 1;
nptr->ndoc = 1;
nptr->len = (*ptr)->len;
cur = 0;
StatEntry *sptr;
WordEntry *wptr;
+ int n=0;
if (stat == NULL || PG_ARGISNULL(0))
{ /* Init in first */
stat = palloc(STATHDRSIZE);
stat->len = STATHDRSIZE;
stat->size = 0;
+ stat->weight = 0;
}
/* simple check of correctness */
sptr++;
else if (cmp == 0)
{
- int n = POSDATALEN(txt, wptr);
-
- if (n == 0)
- n = 1;
- sptr->ndoc++;
- sptr->nentry += n;
+ if ( stat->weight == 0 ) {
+ sptr->ndoc++;
+ sptr->nentry += (wptr->haspos) ? POSDATALEN(txt, wptr) : 1;
+ } else if ( wptr->haspos && (n=check_weight(txt, wptr, stat->weight))!=0 ) {
+ sptr->ndoc++;
+ sptr->nentry += n;
+ }
sptr++;
wptr++;
}
else
{
- if (cur == len)
- newentry = SEI_realloc(newentry, &len);
- newentry[cur] = wptr;
+ if ( stat->weight == 0 || check_weight(txt, wptr, stat->weight)!=0 ) {
+ if (cur == len)
+ newentry = SEI_realloc(newentry, &len);
+ newentry[cur] = wptr;
+ cur++;
+ }
wptr++;
- cur++;
}
}
while (wptr - ARRPTR(txt) < txt->size)
{
- if (cur == len)
- newentry = SEI_realloc(newentry, &len);
- newentry[cur] = wptr;
+ if ( stat->weight == 0 || check_weight(txt, wptr, stat->weight)!=0 ) {
+ if (cur == len)
+ newentry = SEI_realloc(newentry, &len);
+ newentry[cur] = wptr;
+ cur++;
+ }
wptr++;
- cur++;
}
}
else
cmp = compareStatWord(sptr, wptr, stat, txt);
if (cmp == 0)
{
- int n = POSDATALEN(txt, wptr);
-
- if (n == 0)
- n = 1;
- sptr->ndoc++;
- sptr->nentry += n;
+ if ( stat->weight == 0 ) {
+ sptr->ndoc++;
+ sptr->nentry += (wptr->haspos) ? POSDATALEN(txt, wptr) : 1;
+ } else if ( wptr->haspos && (n=check_weight(txt, wptr, stat->weight))!=0 ) {
+ sptr->ndoc++;
+ sptr->nentry += n;
+ }
break;
}
else if (cmp < 0)
if (StopLow >= StopHigh)
{ /* not found */
- if (cur == len)
- newentry = SEI_realloc(newentry, &len);
- newentry[cur] = wptr;
- cur++;
+ if ( stat->weight == 0 || check_weight(txt, wptr, stat->weight)!=0 ) {
+ if (cur == len)
+ newentry = SEI_realloc(newentry, &len);
+ newentry[cur] = wptr;
+ cur++;
+ }
}
wptr++;
}
}
static tsstat *
-ts_stat_sql(text *txt)
+ts_stat_sql(text *txt, text *ws)
{
char *query = text2char(txt);
int i;
stat = palloc(STATHDRSIZE);
stat->len = STATHDRSIZE;
stat->size = 0;
+ stat->weight = 0;
+
+ if ( ws ) {
+ char *buf;
+ buf = VARDATA(ws);
+ while( buf - VARDATA(ws) < VARSIZE(buf) - VARHDRSZ ) {
+ switch (tolower(*buf)) {
+ case 'a':
+ stat->weight |= 1 << 3;
+ break;
+ case 'b':
+ stat->weight |= 1 << 2;
+ break;
+ case 'c':
+ stat->weight |= 1 << 1;
+ break;
+ case 'd':
+ stat->weight |= 1;
+ break;
+ default:
+ stat->weight |= 0;
+ }
+ buf++;
+ }
+ }
while (SPI_processed > 0)
{
{
tsstat *stat;
text *txt = PG_GETARG_TEXT_P(0);
+ text *ws = (PG_NARGS() > 1) ? PG_GETARG_TEXT_P(1) : NULL;
funcctx = SRF_FIRSTCALL_INIT();
SPI_connect();
- stat = ts_stat_sql(txt);
+ stat = ts_stat_sql(txt,ws);
PG_FREE_IF_COPY(txt, 0);
+ if (PG_NARGS() > 1 ) PG_FREE_IF_COPY(ws, 1);
ts_setup_firstcall(funcctx, stat);
SPI_finish();
}
{
int4 len;
int4 size;
+ int4 weight;
char data[1];
} tsstat;
-#define STATHDRSIZE (sizeof(int4)*2)
+#define STATHDRSIZE (sizeof(int4)*4)
#define CALCSTATSIZE(x, lenstr) ( x * sizeof(StatEntry) + STATHDRSIZE + lenstr )
#define STATPTR(x) ( (StatEntry*) ( (char*)x + STATHDRSIZE ) )
#define STATSTRPTR(x) ( (char*)x + STATHDRSIZE + ( sizeof(StatEntry) * ((tsvector*)x)->size ) )
language 'C'
with (isstrict);
+CREATE FUNCTION stat(text,text)
+ returns setof statinfo
+ as 'MODULE_PATHNAME', 'ts_stat'
+ language 'C'
+ with (isstrict);
+
--reset - just for debuging
CREATE FUNCTION reset_tsearch()
returns void
DROP FUNCTION gtsvector_picksplit(internal, internal);
DROP FUNCTION gtsvector_union(internal, internal);
DROP FUNCTION reset_tsearch();
+DROP FUNCTION stat(text);
+DROP FUNCTION stat(text,stat);
DROP FUNCTION tsearch2() CASCADE;
DROP FUNCTION _get_parser_from_curcfg();