quark
(1 row)
+select * from test2 where t like '%z foo bar%';
+ t
+-------------
+ z foo bar
+(1 row)
+
+select * from test2 where t like ' z foo%';
+ t
+-------------
+ z foo bar
+(1 row)
+
+explain (costs off)
+ select * from test2 where t ~ '[abc]{3}';
+ QUERY PLAN
+------------------------------------------
+ Index Scan using test2_idx_gist on test2
+ Index Cond: (t ~ '[abc]{3}'::text)
+(2 rows)
+
+explain (costs off)
+ select * from test2 where t ~* 'DEF';
+ QUERY PLAN
+------------------------------------------
+ Index Scan using test2_idx_gist on test2
+ Index Cond: (t ~* 'DEF'::text)
+(2 rows)
+
+select * from test2 where t ~ '[abc]{3}';
+ t
+--------
+ abcdef
+(1 row)
+
+select * from test2 where t ~ 'a[bc]+d';
+ t
+--------
+ abcdef
+(1 row)
+
+select * from test2 where t ~ '(abc)*$';
+ t
+-------------
+ abcdef
+ quark
+ z foo bar
+(3 rows)
+
+select * from test2 where t ~* 'DEF';
+ t
+--------
+ abcdef
+(1 row)
+
+select * from test2 where t ~ 'dEf';
+ t
+---
+(0 rows)
+
+select * from test2 where t ~* '^q';
+ t
+-------
+ quark
+(1 row)
+
+select * from test2 where t ~* '[abc]{3}[def]{3}';
+ t
+--------
+ abcdef
+(1 row)
+
+select * from test2 where t ~* 'ab[a-z]{3}';
+ t
+--------
+ abcdef
+(1 row)
+
+select * from test2 where t ~* '(^| )qua';
+ t
+-------
+ quark
+(1 row)
+
+select * from test2 where t ~ 'q.*rk$';
+ t
+-------
+ quark
+(1 row)
+
+select * from test2 where t ~ 'q';
+ t
+-------
+ quark
+(1 row)
+
+select * from test2 where t ~ '[a-z]{3}';
+ t
+-------------
+ abcdef
+ quark
+ z foo bar
+(3 rows)
+
+select * from test2 where t ~* '(a{10}|b{10}|c{10}){10}';
+ t
+---
+(0 rows)
+
+select * from test2 where t ~ 'z foo bar';
+ t
+-------------
+ z foo bar
+(1 row)
+
+select * from test2 where t ~ ' z foo bar';
+ t
+-------------
+ z foo bar
+(1 row)
+
+select * from test2 where t ~ ' z foo bar';
+ t
+-------------
+ z foo bar
+(1 row)
+
+select * from test2 where t ~ ' z foo';
+ t
+-------------
+ z foo bar
+(1 row)
+
-- complain if script is sourced in psql, rather than via CREATE EXTENSION
\echo Use "ALTER EXTENSION pg_trgm UPDATE TO '1.1'" to load this file. \quit
+ALTER OPERATOR FAMILY gist_trgm_ops USING gist ADD
+ OPERATOR 5 pg_catalog.~ (text, text),
+ OPERATOR 6 pg_catalog.~* (text, text);
+
ALTER OPERATOR FAMILY gin_trgm_ops USING gin ADD
OPERATOR 5 pg_catalog.~ (text, text),
OPERATOR 6 pg_catalog.~* (text, text);
OPERATOR 4 pg_catalog.~~* (text, text),
FUNCTION 8 (text, text) gtrgm_distance (internal, text, int, oid);
+-- Add operators that are new in 9.3.
+
+ALTER OPERATOR FAMILY gist_trgm_ops USING gist ADD
+ OPERATOR 5 pg_catalog.~ (text, text),
+ OPERATOR 6 pg_catalog.~* (text, text);
+
-- support functions for gin
CREATE FUNCTION gin_extract_value_trgm(text, internal)
RETURNS internal
select * from test2 where t like E'%\\bcd%';
select * from test2 where t ilike '%BCD%';
select * from test2 where t ilike 'qua%';
+select * from test2 where t like '%z foo bar%';
+select * from test2 where t like ' z foo%';
+explain (costs off)
+ select * from test2 where t ~ '[abc]{3}';
+explain (costs off)
+ select * from test2 where t ~* 'DEF';
+select * from test2 where t ~ '[abc]{3}';
+select * from test2 where t ~ 'a[bc]+d';
+select * from test2 where t ~ '(abc)*$';
+select * from test2 where t ~* 'DEF';
+select * from test2 where t ~ 'dEf';
+select * from test2 where t ~* '^q';
+select * from test2 where t ~* '[abc]{3}[def]{3}';
+select * from test2 where t ~* 'ab[a-z]{3}';
+select * from test2 where t ~* '(^| )qua';
+select * from test2 where t ~ 'q.*rk$';
+select * from test2 where t ~ 'q';
+select * from test2 where t ~ '[a-z]{3}';
+select * from test2 where t ~* '(a{10}|b{10}|c{10}){10}';
+select * from test2 where t ~ 'z foo bar';
+select * from test2 where t ~ ' z foo bar';
+select * from test2 where t ~ ' z foo bar';
+select * from test2 where t ~ ' z foo';
extern TRGM *generate_wildcard_trgm(const char *str, int slen);
extern float4 cnt_sml(TRGM *trg1, TRGM *trg2);
extern bool trgm_contained_by(TRGM *trg1, TRGM *trg2);
-extern TRGM *createTrgmNFA(text *text_re, TrgmPackedGraph **graph,
- Oid collation);
+extern bool *trgm_presence_map(TRGM *query, TRGM *key);
+extern TRGM *createTrgmNFA(text *text_re, Oid collation,
+ TrgmPackedGraph **graph, MemoryContext rcontext);
extern bool trigramsMatchGraph(TrgmPackedGraph *graph, bool *check);
#endif /* __TRGM_H__ */
#endif
/* FALL THRU */
case RegExpStrategyNumber:
- trg = createTrgmNFA(val, &graph, PG_GET_COLLATION());
+ trg = createTrgmNFA(val, PG_GET_COLLATION(),
+ &graph, CurrentMemoryContext);
if (trg && ARRNELEM(trg) > 0)
{
/*
#include "access/skey.h"
+typedef struct
+{
+ /* most recent inputs to gtrgm_consistent */
+ StrategyNumber strategy;
+ text *query;
+ /* extracted trigrams for query */
+ TRGM *trigrams;
+ /* if a regex operator, the extracted graph */
+ TrgmPackedGraph *graph;
+
+ /*
+ * The "query" and "trigrams" are stored in the same palloc block as this
+ * cache struct, at MAXALIGN'ed offsets. The graph however isn't.
+ */
+} gtrgm_consistent_cache;
+
+#define GETENTRY(vec,pos) ((TRGM *) DatumGetPointer((vec)->vector[(pos)].key))
+
+
PG_FUNCTION_INFO_V1(gtrgm_in);
Datum gtrgm_in(PG_FUNCTION_ARGS);
PG_FUNCTION_INFO_V1(gtrgm_picksplit);
Datum gtrgm_picksplit(PG_FUNCTION_ARGS);
-#define GETENTRY(vec,pos) ((TRGM *) DatumGetPointer((vec)->vector[(pos)].key))
-
/* Number of one-bits in an unsigned byte */
static const uint8 number_of_ones[256] = {
0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4,
TRGM *qtrg;
bool res;
Size querysize = VARSIZE(query);
- char *cache = (char *) fcinfo->flinfo->fn_extra,
- *cachedQuery = cache + MAXALIGN(sizeof(StrategyNumber));
+ gtrgm_consistent_cache *cache;
/*
- * Store both the strategy number and extracted trigrams in cache, because
- * trigram extraction is relatively CPU-expensive. We must include
- * strategy number because trigram extraction depends on strategy.
+ * We keep the extracted trigrams in cache, because trigram extraction is
+ * relatively CPU-expensive. When trying to reuse a cached value, check
+ * strategy number not just query itself, because trigram extraction
+ * depends on strategy.
*
- * The cached structure contains the strategy number, then the input query
- * (starting at a MAXALIGN boundary), then the TRGM value (also starting
- * at a MAXALIGN boundary).
+ * The cached structure is a single palloc chunk containing the
+ * gtrgm_consistent_cache header, then the input query (starting at a
+ * MAXALIGN boundary), then the TRGM value (also starting at a MAXALIGN
+ * boundary). However we don't try to include the regex graph (if any) in
+ * that struct. (XXX currently, this approach can leak regex graphs
+ * across index rescans. Not clear if that's worth fixing.)
*/
+ cache = (gtrgm_consistent_cache *) fcinfo->flinfo->fn_extra;
if (cache == NULL ||
- strategy != *((StrategyNumber *) cache) ||
- VARSIZE(cachedQuery) != querysize ||
- memcmp(cachedQuery, query, querysize) != 0)
+ cache->strategy != strategy ||
+ VARSIZE(cache->query) != querysize ||
+ memcmp((char *) cache->query, (char *) query, querysize) != 0)
{
- char *newcache;
+ gtrgm_consistent_cache *newcache;
+ TrgmPackedGraph *graph = NULL;
+ Size qtrgsize;
switch (strategy)
{
qtrg = generate_wildcard_trgm(VARDATA(query),
querysize - VARHDRSZ);
break;
+ case RegExpICaseStrategyNumber:
+#ifndef IGNORECASE
+ elog(ERROR, "cannot handle ~* with case-sensitive trigrams");
+#endif
+ /* FALL THRU */
+ case RegExpStrategyNumber:
+ qtrg = createTrgmNFA(query, PG_GET_COLLATION(),
+ &graph, fcinfo->flinfo->fn_mcxt);
+ /* just in case an empty array is returned ... */
+ if (qtrg && ARRNELEM(qtrg) <= 0)
+ {
+ pfree(qtrg);
+ qtrg = NULL;
+ }
+ break;
default:
elog(ERROR, "unrecognized strategy number: %d", strategy);
qtrg = NULL; /* keep compiler quiet */
break;
}
- newcache = MemoryContextAlloc(fcinfo->flinfo->fn_mcxt,
- MAXALIGN(sizeof(StrategyNumber)) +
- MAXALIGN(querysize) +
- VARSIZE(qtrg));
- cachedQuery = newcache + MAXALIGN(sizeof(StrategyNumber));
+ qtrgsize = qtrg ? VARSIZE(qtrg) : 0;
- *((StrategyNumber *) newcache) = strategy;
- memcpy(cachedQuery, query, querysize);
- memcpy(cachedQuery + MAXALIGN(querysize), qtrg, VARSIZE(qtrg));
+ newcache = (gtrgm_consistent_cache *)
+ MemoryContextAlloc(fcinfo->flinfo->fn_mcxt,
+ MAXALIGN(sizeof(gtrgm_consistent_cache)) +
+ MAXALIGN(querysize) +
+ qtrgsize);
+
+ newcache->strategy = strategy;
+ newcache->query = (text *)
+ ((char *) newcache + MAXALIGN(sizeof(gtrgm_consistent_cache)));
+ memcpy((char *) newcache->query, (char *) query, querysize);
+ if (qtrg)
+ {
+ newcache->trigrams = (TRGM *)
+ ((char *) newcache->query + MAXALIGN(querysize));
+ memcpy((char *) newcache->trigrams, (char *) qtrg, qtrgsize);
+ /* release qtrg in case it was made in fn_mcxt */
+ pfree(qtrg);
+ }
+ else
+ newcache->trigrams = NULL;
+ newcache->graph = graph;
if (cache)
pfree(cache);
- fcinfo->flinfo->fn_extra = newcache;
+ fcinfo->flinfo->fn_extra = (void *) newcache;
+ cache = newcache;
}
- qtrg = (TRGM *) (cachedQuery + MAXALIGN(querysize));
+ qtrg = cache->trigrams;
switch (strategy)
{
}
}
break;
+ case RegExpICaseStrategyNumber:
+#ifndef IGNORECASE
+ elog(ERROR, "cannot handle ~* with case-sensitive trigrams");
+#endif
+ /* FALL THRU */
+ case RegExpStrategyNumber:
+ /* Regexp search is inexact */
+ *recheck = true;
+
+ /* Check regex match as much as we can with available info */
+ if (qtrg)
+ {
+ if (GIST_LEAF(entry))
+ { /* all leafs contains orig trgm */
+ bool *check;
+
+ check = trgm_presence_map(qtrg, key);
+ res = trigramsMatchGraph(cache->graph, check);
+ pfree(check);
+ }
+ else if (ISALLTRUE(key))
+ { /* non-leaf contains signature */
+ res = true;
+ }
+ else
+ { /* non-leaf contains signature */
+ int32 k,
+ tmp = 0,
+ len = ARRNELEM(qtrg);
+ trgm *ptr = GETARR(qtrg);
+ BITVECP sign = GETSIGN(key);
+
+ /* descend only if at least one trigram is present */
+ res = false;
+ for (k = 0; k < len; k++)
+ {
+ CPTRGM(((char *) &tmp), ptr + k);
+ if (GETBIT(sign, HASHVAL(tmp)))
+ {
+ res = true;
+ break;
+ }
+ }
+ }
+ }
+ else
+ {
+ /* trigram-free query must be rechecked everywhere */
+ res = true;
+ }
+ break;
default:
elog(ERROR, "unrecognized strategy number: %d", strategy);
res = false; /* keep compiler quiet */
return true;
}
+/*
+ * Return a palloc'd boolean array showing, for each trigram in "query",
+ * whether it is present in the trigram array "key".
+ * This relies on the "key" array being sorted, but "query" need not be.
+ */
+bool *
+trgm_presence_map(TRGM *query, TRGM *key)
+{
+ bool *result;
+ trgm *ptrq = GETARR(query),
+ *ptrk = GETARR(key);
+ int lenq = ARRNELEM(query),
+ lenk = ARRNELEM(key),
+ i;
+
+ result = (bool *) palloc0(lenq * sizeof(bool));
+
+ /* for each query trigram, do a binary search in the key array */
+ for (i = 0; i < lenq; i++)
+ {
+ int lo = 0;
+ int hi = lenk;
+
+ while (lo < hi)
+ {
+ int mid = (lo + hi) / 2;
+ int res = CMPTRGM(ptrq, ptrk + mid);
+
+ if (res < 0)
+ hi = mid;
+ else if (res > 0)
+ lo = mid + 1;
+ else
+ {
+ result[i] = true;
+ break;
+ }
+ }
+ ptrq++;
+ }
+
+ return result;
+}
+
Datum
similarity(PG_FUNCTION_ARGS)
{
*
* Returns an array of trigrams required by the regular expression, or NULL if
* the regular expression was too complex to analyze. In addition, a packed
- * graph representation of the regex is returned into *graph.
+ * graph representation of the regex is returned into *graph. The results
+ * must be allocated in rcontext (which might or might not be the current
+ * context).
*/
TRGM *
-createTrgmNFA(text *text_re, TrgmPackedGraph **graph, Oid collation)
+createTrgmNFA(text *text_re, Oid collation,
+ TrgmPackedGraph **graph, MemoryContext rcontext)
{
TRGM *trg;
regex_t regex;
/*
* This processing generates a great deal of cruft, which we'd like to
- * clean up before returning (since this function is normally called in a
+ * clean up before returning (since this function may be called in a
* query-lifespan memory context). Make a temp context we can work in so
- * that cleanup is easy. Note that the returned data structures must be
- * allocated in caller's context, however.
+ * that cleanup is easy.
*/
tmpcontext = AllocSetContextCreate(CurrentMemoryContext,
"createTrgmNFA temporary context",
*/
PG_TRY();
{
- trg = createTrgmNFAInternal(®ex, graph, oldcontext);
+ trg = createTrgmNFAInternal(®ex, graph, rcontext);
}
PG_CATCH();
{
</para>
<para>
- Beginning in <productname>PostgreSQL</> 9.3, <filename>pg_trgm</filename>
- GIN indexes also support index searches for regular-expression matches
+ Beginning in <productname>PostgreSQL</> 9.3, these index types also support
+ index searches for regular-expression matches
(<literal>~</> and <literal>~*</> operators), for example
<programlisting>
SELECT * FROM test_trgm WHERE t ~ '(foo|bar)';