1 /*-------------------------------------------------------------------------
4 * private declarations for GiST -- declarations related to the
5 * internal implementation of GiST, not the public API
7 * Portions Copyright (c) 1996-2012, PostgreSQL Global Development Group
8 * Portions Copyright (c) 1994, Regents of the University of California
10 * src/include/access/gist_private.h
12 *-------------------------------------------------------------------------
14 #ifndef GIST_PRIVATE_H
15 #define GIST_PRIVATE_H
17 #include "access/gist.h"
18 #include "access/itup.h"
20 #include "storage/bufmgr.h"
21 #include "storage/buffile.h"
22 #include "utils/rbtree.h"
23 #include "utils/hsearch.h"
25 /* Buffer lock modes */
26 #define GIST_SHARE BUFFER_LOCK_SHARE
27 #define GIST_EXCLUSIVE BUFFER_LOCK_EXCLUSIVE
28 #define GIST_UNLOCK BUFFER_LOCK_UNLOCK
37 #define BUFFER_PAGE_DATA_OFFSET MAXALIGN(offsetof(GISTNodeBufferPage, tupledata))
38 /* Returns free space in node buffer page */
39 #define PAGE_FREE_SPACE(nbp) (nbp->freespace)
40 /* Checks if node buffer page is empty */
41 #define PAGE_IS_EMPTY(nbp) (nbp->freespace == BLCKSZ - BUFFER_PAGE_DATA_OFFSET)
42 /* Checks if node buffers page don't contain sufficient space for index tuple */
43 #define PAGE_NO_SPACE(nbp, itup) (PAGE_FREE_SPACE(nbp) < \
44 MAXALIGN(IndexTupleSize(itup)))
47 * GISTSTATE: information needed for any GiST index operation
49 * This struct retains call info for the index's opclass-specific support
50 * functions (per index column), plus the index's tuple descriptor.
52 * scanCxt holds the GISTSTATE itself as well as any data that lives for the
53 * lifetime of the index operation. We pass this to the support functions
54 * via fn_mcxt, so that they can store scan-lifespan data in it. The
55 * functions are invoked in tempCxt, which is typically short-lifespan
56 * (that is, it's reset after each tuple). However, tempCxt can be the same
57 * as scanCxt if we're not bothering with per-tuple context resets.
59 typedef struct GISTSTATE
61 MemoryContext scanCxt; /* context for scan-lifespan data */
62 MemoryContext tempCxt; /* short-term context for calling functions */
64 TupleDesc tupdesc; /* index's tuple descriptor */
66 FmgrInfo consistentFn[INDEX_MAX_KEYS];
67 FmgrInfo unionFn[INDEX_MAX_KEYS];
68 FmgrInfo compressFn[INDEX_MAX_KEYS];
69 FmgrInfo decompressFn[INDEX_MAX_KEYS];
70 FmgrInfo penaltyFn[INDEX_MAX_KEYS];
71 FmgrInfo picksplitFn[INDEX_MAX_KEYS];
72 FmgrInfo equalFn[INDEX_MAX_KEYS];
73 FmgrInfo distanceFn[INDEX_MAX_KEYS];
75 /* Collations to pass to the support functions */
76 Oid supportCollation[INDEX_MAX_KEYS];
81 * During a GiST index search, we must maintain a queue of unvisited items,
82 * which can be either individual heap tuples or whole index pages. If it
83 * is an ordered search, the unvisited items should be visited in distance
84 * order. Unvisited items at the same distance should be visited in
85 * depth-first order, that is heap items first, then lower index pages, then
86 * upper index pages; this rule avoids doing extra work during a search that
87 * ends early due to LIMIT.
89 * To perform an ordered search, we use an RBTree to manage the distance-order
90 * queue. Each GISTSearchTreeItem stores all unvisited items of the same
91 * distance; they are GISTSearchItems chained together via their next fields.
93 * In a non-ordered search (no order-by operators), the RBTree degenerates
94 * to a single item, which we use as a queue of unvisited index pages only.
95 * In this case matched heap items from the current index leaf page are
96 * remembered in GISTScanOpaqueData.pageData[] and returned directly from
97 * there, instead of building a separate GISTSearchItem for each one.
100 /* Individual heap tuple to be visited */
101 typedef struct GISTSearchHeapItem
103 ItemPointerData heapPtr;
104 bool recheck; /* T if quals must be rechecked */
105 } GISTSearchHeapItem;
107 /* Unvisited item, either index page or heap tuple */
108 typedef struct GISTSearchItem
110 struct GISTSearchItem *next; /* list link */
111 BlockNumber blkno; /* index page number, or InvalidBlockNumber */
114 GistNSN parentlsn; /* parent page's LSN, if index page */
115 /* we must store parentlsn to detect whether a split occurred */
116 GISTSearchHeapItem heap; /* heap info, if heap tuple */
120 #define GISTSearchItemIsHeap(item) ((item).blkno == InvalidBlockNumber)
123 * Within a GISTSearchTreeItem's chain, heap items always appear before
124 * index-page items, since we want to visit heap items first. lastHeap points
125 * to the last heap item in the chain, or is NULL if there are none.
127 typedef struct GISTSearchTreeItem
129 RBNode rbnode; /* this is an RBTree item */
130 GISTSearchItem *head; /* first chain member */
131 GISTSearchItem *lastHeap; /* last heap-tuple member, if any */
132 double distances[1]; /* array with numberOfOrderBys entries */
133 } GISTSearchTreeItem;
135 #define GSTIHDRSZ offsetof(GISTSearchTreeItem, distances)
138 * GISTScanOpaqueData: private state for a scan of a GiST index
140 typedef struct GISTScanOpaqueData
142 GISTSTATE *giststate; /* index information, see above */
143 RBTree *queue; /* queue of unvisited items */
144 MemoryContext queueCxt; /* context holding the queue */
145 bool qual_ok; /* false if qual can never be satisfied */
146 bool firstCall; /* true until first gistgettuple call */
148 GISTSearchTreeItem *curTreeItem; /* current queue item, if any */
150 /* pre-allocated workspace arrays */
151 GISTSearchTreeItem *tmpTreeItem; /* workspace to pass to rb_insert */
152 double *distances; /* output area for gistindex_keytest */
154 /* In a non-ordered search, returnable heap items are stored here: */
155 GISTSearchHeapItem pageData[BLCKSZ / sizeof(IndexTupleData)];
156 OffsetNumber nPageData; /* number of valid items in array */
157 OffsetNumber curPageData; /* next item to return */
158 } GISTScanOpaqueData;
160 typedef GISTScanOpaqueData *GISTScanOpaque;
165 #define XLOG_GIST_PAGE_UPDATE 0x00
166 /* #define XLOG_GIST_NEW_ROOT 0x20 */ /* not used anymore */
167 #define XLOG_GIST_PAGE_SPLIT 0x30
168 /* #define XLOG_GIST_INSERT_COMPLETE 0x40 */ /* not used anymore */
169 #define XLOG_GIST_CREATE_INDEX 0x50
170 #define XLOG_GIST_PAGE_DELETE 0x60
172 typedef struct gistxlogPageUpdate
178 * If this operation completes a page split, by inserting a downlink for
179 * the split page, leftchild points to the left half of the split.
181 BlockNumber leftchild;
183 /* number of deleted offsets */
187 * follow: 1. todelete OffsetNumbers 2. tuples to insert
189 } gistxlogPageUpdate;
191 typedef struct gistxlogPageSplit
194 BlockNumber origblkno; /* splitted page */
195 BlockNumber origrlink; /* rightlink of the page before split */
196 GistNSN orignsn; /* NSN of the page before split */
197 bool origleaf; /* was splitted page a leaf page? */
199 BlockNumber leftchild; /* like in gistxlogPageUpdate */
200 uint16 npage; /* # of pages in the split */
201 bool markfollowright; /* set F_FOLLOW_RIGHT flags */
204 * follow: 1. gistxlogPage and array of IndexTupleData per page
208 typedef struct gistxlogPage
211 int num; /* number of index tuples following */
214 typedef struct gistxlogPageDelete
218 } gistxlogPageDelete;
220 /* SplitedPageLayout - gistSplit function result */
221 typedef struct SplitedPageLayout
224 IndexTupleData *list;
226 IndexTuple itup; /* union key for page */
227 Page page; /* to operate */
228 Buffer buffer; /* to write after all proceed */
230 struct SplitedPageLayout *next;
234 * GISTInsertStack used for locking buffers and transfer arguments during
237 typedef struct GISTInsertStack
245 * log sequence number from page->lsn to recognize page update and compare
246 * it with page's nsn to recognize page split
250 /* offset of the downlink in the parent page, that points to this page */
251 OffsetNumber downlinkoffnum;
253 /* pointer to parent */
254 struct GISTInsertStack *parent;
257 typedef struct GistSplitVector
259 GIST_SPLITVEC splitVector; /* to/from PickSplit method */
261 Datum spl_lattr[INDEX_MAX_KEYS]; /* Union of subkeys in
263 bool spl_lisnull[INDEX_MAX_KEYS];
265 Datum spl_rattr[INDEX_MAX_KEYS]; /* Union of subkeys in
267 bool spl_risnull[INDEX_MAX_KEYS];
269 bool *spl_equiv; /* equivalent tuples which can be freely
270 * distributed between left and right pages */
276 Size freespace; /* free space to be left */
278 GISTInsertStack *stack;
281 /* root page of a gist index */
282 #define GIST_ROOT_BLKNO 0
285 * Before PostgreSQL 9.1, we used rely on so-called "invalid tuples" on inner
286 * pages to finish crash recovery of incomplete page splits. If a crash
287 * happened in the middle of a page split, so that the downlink pointers were
288 * not yet inserted, crash recovery inserted a special downlink pointer. The
289 * semantics of an invalid tuple was that it if you encounter one in a scan,
290 * it must always be followed, because we don't know if the tuples on the
291 * child page match or not.
293 * We no longer create such invalid tuples, we now mark the left-half of such
294 * an incomplete split with the F_FOLLOW_RIGHT flag instead, and finish the
295 * split properly the next time we need to insert on that page. To retain
296 * on-disk compatibility for the sake of pg_upgrade, we still store 0xffff as
297 * the offset number of all inner tuples. If we encounter any invalid tuples
298 * with 0xfffe during insertion, we throw an error, though scans still handle
299 * them. You should only encounter invalid tuples if you pg_upgrade a pre-9.1
300 * gist index which already has invalid tuples in it because of a crash. That
301 * should be rare, and you are recommended to REINDEX anyway if you have any
302 * invalid tuples in an index, so throwing an error is as far as we go with
305 #define TUPLE_IS_VALID 0xffff
306 #define TUPLE_IS_INVALID 0xfffe
308 #define GistTupleIsInvalid(itup) ( ItemPointerGetOffsetNumber( &((itup)->t_tid) ) == TUPLE_IS_INVALID )
309 #define GistTupleSetValid(itup) ItemPointerSetOffsetNumber( &((itup)->t_tid), TUPLE_IS_VALID )
315 * A buffer attached to an internal node, used when building an index in
320 BlockNumber nodeBlocknum; /* index block # this buffer is for */
321 int32 blocksCount; /* current # of blocks occupied by buffer */
323 BlockNumber pageBlocknum; /* temporary file block # */
324 GISTNodeBufferPage *pageBuffer; /* in-memory buffer page */
326 /* is this buffer queued for emptying? */
327 bool queuedForEmptying;
329 /* is this a temporary copy, not in the hash table? */
332 int level; /* 0 == leaf */
336 * Does specified level have buffers? (Beware of multiple evaluation of
339 #define LEVEL_HAS_BUFFERS(nlevel, gfbb) \
340 ((nlevel) != 0 && (nlevel) % (gfbb)->levelStep == 0 && \
341 (nlevel) != (gfbb)->rootlevel)
343 /* Is specified buffer at least half-filled (should be queued for emptying)? */
344 #define BUFFER_HALF_FILLED(nodeBuffer, gfbb) \
345 ((nodeBuffer)->blocksCount > (gfbb)->pagesPerBuffer / 2)
348 * Is specified buffer full? Our buffers can actually grow indefinitely,
349 * beyond the "maximum" size, so this just means whether the buffer has grown
350 * beyond the nominal maximum size.
352 #define BUFFER_OVERFLOWED(nodeBuffer, gfbb) \
353 ((nodeBuffer)->blocksCount > (gfbb)->pagesPerBuffer)
356 * Data structure with general information about build buffers.
358 typedef struct GISTBuildBuffers
360 /* Persistent memory context for the buffers and metadata. */
361 MemoryContext context;
363 BufFile *pfile; /* Temporary file to store buffers in */
364 long nFileBlocks; /* Current size of the temporary file */
367 * resizable array of free blocks.
370 int nFreeBlocks; /* # of currently free blocks in the array */
371 int freeBlocksLen; /* current allocated length of the array */
373 /* Hash for buffers by block number */
374 HTAB *nodeBuffersTab;
376 /* List of buffers scheduled for emptying */
377 List *bufferEmptyingQueue;
380 * Parameters to the buffering build algorithm. levelStep determines which
381 * levels in the tree have buffers, and pagesPerBuffer determines how
382 * large each buffer is.
387 /* Array of lists of buffers on each level, for final emptying */
388 List **buffersOnLevels;
389 int buffersOnLevelsLen;
392 * Dynamically-sized array of buffers that currently have their last page
393 * loaded in main memory.
395 GISTNodeBuffer **loadedBuffers;
396 int loadedBuffersCount; /* # of entries in loadedBuffers */
397 int loadedBuffersLen; /* allocated size of loadedBuffers */
399 /* Level of the current root node (= height of the index tree - 1) */
404 * Storage type for GiST's reloptions
406 typedef struct GiSTOptions
408 int32 vl_len_; /* varlena header (do not touch directly!) */
409 int fillfactor; /* page fill factor in percent (0..100) */
410 int bufferingModeOffset; /* use buffering build? */
414 extern Datum gistbuildempty(PG_FUNCTION_ARGS);
415 extern Datum gistinsert(PG_FUNCTION_ARGS);
416 extern MemoryContext createTempGistContext(void);
417 extern GISTSTATE *initGISTstate(Relation index);
418 extern void freeGISTstate(GISTSTATE *giststate);
419 extern void gistdoinsert(Relation r,
422 GISTSTATE *GISTstate);
424 /* A List of these is returned from gistplacetopage() in *splitinfo */
427 Buffer buf; /* the split page "half" */
428 IndexTuple downlink; /* downlink for this half. */
431 extern bool gistplacetopage(Relation rel, Size freespace, GISTSTATE *giststate,
433 IndexTuple *itup, int ntup, OffsetNumber oldoffnum,
438 extern SplitedPageLayout *gistSplit(Relation r, Page page, IndexTuple *itup,
439 int len, GISTSTATE *giststate);
442 extern void gist_redo(XLogRecPtr lsn, XLogRecord *record);
443 extern void gist_desc(StringInfo buf, uint8 xl_info, char *rec);
444 extern void gist_xlog_startup(void);
445 extern void gist_xlog_cleanup(void);
447 extern XLogRecPtr gistXLogUpdate(RelFileNode node, Buffer buffer,
448 OffsetNumber *todelete, int ntodelete,
449 IndexTuple *itup, int ntup,
452 extern XLogRecPtr gistXLogSplit(RelFileNode node,
453 BlockNumber blkno, bool page_is_leaf,
454 SplitedPageLayout *dist,
455 BlockNumber origrlink, GistNSN oldnsn,
456 Buffer leftchild, bool markfollowright);
459 extern Datum gistgettuple(PG_FUNCTION_ARGS);
460 extern Datum gistgetbitmap(PG_FUNCTION_ARGS);
464 #define GiSTPageSize \
465 ( BLCKSZ - SizeOfPageHeaderData - MAXALIGN(sizeof(GISTPageOpaqueData)) )
467 #define GIST_MIN_FILLFACTOR 10
468 #define GIST_DEFAULT_FILLFACTOR 90
470 extern Datum gistoptions(PG_FUNCTION_ARGS);
471 extern bool gistfitpage(IndexTuple *itvec, int len);
472 extern bool gistnospace(Page page, IndexTuple *itvec, int len, OffsetNumber todelete, Size freespace);
473 extern void gistcheckpage(Relation rel, Buffer buf);
474 extern Buffer gistNewBuffer(Relation r);
475 extern void gistfillbuffer(Page page, IndexTuple *itup, int len,
477 extern IndexTuple *gistextractpage(Page page, int *len /* out */ );
478 extern IndexTuple *gistjoinvector(
479 IndexTuple *itvec, int *len,
480 IndexTuple *additvec, int addlen);
481 extern IndexTupleData *gistfillitupvec(IndexTuple *vec, int veclen, int *memlen);
483 extern IndexTuple gistunion(Relation r, IndexTuple *itvec,
484 int len, GISTSTATE *giststate);
485 extern IndexTuple gistgetadjusted(Relation r,
488 GISTSTATE *giststate);
489 extern IndexTuple gistFormTuple(GISTSTATE *giststate,
490 Relation r, Datum *attdata, bool *isnull, bool newValues);
492 extern OffsetNumber gistchoose(Relation r, Page p,
494 GISTSTATE *giststate);
495 extern void gistcentryinit(GISTSTATE *giststate, int nkey,
496 GISTENTRY *e, Datum k,
498 OffsetNumber o, bool l, bool isNull);
500 extern void GISTInitBuffer(Buffer b, uint32 f);
501 extern void gistdentryinit(GISTSTATE *giststate, int nkey, GISTENTRY *e,
502 Datum k, Relation r, Page pg, OffsetNumber o,
503 bool l, bool isNull);
505 extern float gistpenalty(GISTSTATE *giststate, int attno,
506 GISTENTRY *key1, bool isNull1,
507 GISTENTRY *key2, bool isNull2);
508 extern void gistMakeUnionItVec(GISTSTATE *giststate, IndexTuple *itvec, int len, int startkey,
509 Datum *attr, bool *isnull);
510 extern bool gistKeyIsEQ(GISTSTATE *giststate, int attno, Datum a, Datum b);
511 extern void gistDeCompressAtt(GISTSTATE *giststate, Relation r, IndexTuple tuple, Page p,
512 OffsetNumber o, GISTENTRY *attdata, bool *isnull);
514 extern void gistMakeUnionKey(GISTSTATE *giststate, int attno,
515 GISTENTRY *entry1, bool isnull1,
516 GISTENTRY *entry2, bool isnull2,
517 Datum *dst, bool *dstisnull);
519 extern XLogRecPtr GetXLogRecPtrForTemp(void);
522 extern Datum gistbulkdelete(PG_FUNCTION_ARGS);
523 extern Datum gistvacuumcleanup(PG_FUNCTION_ARGS);
526 extern void gistSplitByKey(Relation r, Page page, IndexTuple *itup,
527 int len, GISTSTATE *giststate,
528 GistSplitVector *v, GistEntryVector *entryvec,
532 extern Datum gistbuild(PG_FUNCTION_ARGS);
533 extern void gistValidateBufferingOption(char *value);
535 /* gistbuildbuffers.c */
536 extern GISTBuildBuffers *gistInitBuildBuffers(int pagesPerBuffer, int levelStep,
538 extern GISTNodeBuffer *gistGetNodeBuffer(GISTBuildBuffers *gfbb,
539 GISTSTATE *giststate,
540 BlockNumber blkno, int level);
541 extern void gistPushItupToNodeBuffer(GISTBuildBuffers *gfbb,
542 GISTNodeBuffer *nodeBuffer, IndexTuple item);
543 extern bool gistPopItupFromNodeBuffer(GISTBuildBuffers *gfbb,
544 GISTNodeBuffer *nodeBuffer, IndexTuple *item);
545 extern void gistFreeBuildBuffers(GISTBuildBuffers *gfbb);
546 extern void gistRelocateBuildBuffersOnSplit(GISTBuildBuffers *gfbb,
547 GISTSTATE *giststate, Relation r,
548 int level, Buffer buffer,
550 extern void gistUnloadNodeBuffers(GISTBuildBuffers *gfbb);
552 #endif /* GIST_PRIVATE_H */