]> granicus.if.org Git - postgresql/commitdiff
BRIN de-summarization
authorAlvaro Herrera <alvherre@alvh.no-ip.org>
Sat, 1 Apr 2017 19:10:04 +0000 (16:10 -0300)
committerAlvaro Herrera <alvherre@alvh.no-ip.org>
Sat, 1 Apr 2017 19:10:04 +0000 (16:10 -0300)
When the BRIN summary tuple for a page range becomes too "wide" for the
values actually stored in the table (because the tuples that were
present originally are no longer present due to updates or deletes), it
can be useful to remove the outdated summary tuple, so that a future
summarization can install a tighter summary.

This commit introduces a SQL-callable interface to do so.

Author: Álvaro Herrera
Reviewed-by: Eiji Seki
Discussion: https://postgr.es/m/20170228045643.n2ri74ara4fhhfxf@alvherre.pgsql

12 files changed:
doc/src/sgml/brin.sgml
doc/src/sgml/func.sgml
src/backend/access/brin/brin.c
src/backend/access/brin/brin_revmap.c
src/backend/access/brin/brin_xlog.c
src/backend/access/rmgrdesc/brindesc.c
src/include/access/brin_revmap.h
src/include/access/brin_xlog.h
src/include/catalog/catversion.h
src/include/catalog/pg_proc.h
src/test/regress/expected/brin.out
src/test/regress/sql/brin.sql

index 5140a38baad44f73d877eb25a9b902eaa1c6eabf..ad11109775f396dccc9ff27c96dc8bf7f88ecf90 100644 (file)
    or by automatic summarization executed by autovacuum, as insertions
    occur.  (This last trigger is disabled by default and can be enabled
    with the <literal>autosummarize</literal> parameter.)
+   Conversely, a range can be de-summarized using the
+   <function>brin_desummarize_range(regclass, bigint)</function> range,
+   which is useful when the index tuple is no longer a very good
+   representation because the existing values have changed.
   </para>
 
  </sect2>
index 25c18d107c82a5a868b3a8e5c17dd5f803211bcc..19329dd1033b0d08dfc072285a7f15d6d9d7e0ea 100644 (file)
@@ -19660,6 +19660,14 @@ postgres=# SELECT * FROM pg_walfile_name_offset(pg_stop_backup());
     <primary>gin_clean_pending_list</primary>
    </indexterm>
 
+   <indexterm>
+    <primary>brin_summarize_range</primary>
+   </indexterm>
+
+   <indexterm>
+    <primary>brin_desummarize_range</primary>
+   </indexterm>
+
    <para>
     <xref linkend="functions-admin-index-table"> shows the functions
     available for index maintenance tasks.
@@ -19690,6 +19698,13 @@ postgres=# SELECT * FROM pg_walfile_name_offset(pg_stop_backup());
        <entry><type>integer</type></entry>
        <entry>summarize the page range covering the given block, if not already summarized</entry>
       </row>
+      <row>
+       <entry>
+        <literal><function>brin_desummarize_range(<parameter>index</> <type>regclass</>, <parameter>blockNumber</> <type>bigint</type>)</function></literal>
+       </entry>
+       <entry><type>integer</type></entry>
+       <entry>de-summarize the page range covering the given block, if summarized</entry>
+      </row>
       <row>
        <entry>
         <literal><function>gin_clean_pending_list(<parameter>index</> <type>regclass</>)</function></literal>
index 86e73b624278c103c186a8dfd20e9d668ef14358..649f3488c20d5211081108ca024b867bc9c9cfe6 100644 (file)
@@ -908,6 +908,80 @@ brin_summarize_range(PG_FUNCTION_ARGS)
        PG_RETURN_INT32((int32) numSummarized);
 }
 
+/*
+ * SQL-callable interface to mark a range as no longer summarized
+ */
+Datum
+brin_desummarize_range(PG_FUNCTION_ARGS)
+{
+       Oid             indexoid = PG_GETARG_OID(0);
+       int64   heapBlk64 = PG_GETARG_INT64(1);
+       BlockNumber heapBlk;
+       Oid             heapoid;
+       Relation heapRel;
+       Relation indexRel;
+       bool    done;
+
+       if (heapBlk64 > MaxBlockNumber || heapBlk64 < 0)
+       {
+               char       *blk = psprintf(INT64_FORMAT, heapBlk64);
+
+               ereport(ERROR,
+                               (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
+                                errmsg("block number out of range: %s", blk)));
+       }
+       heapBlk = (BlockNumber) heapBlk64;
+
+       /*
+        * We must lock table before index to avoid deadlocks.  However, if the
+        * passed indexoid isn't an index then IndexGetRelation() will fail.
+        * Rather than emitting a not-very-helpful error message, postpone
+        * complaining, expecting that the is-it-an-index test below will fail.
+        */
+       heapoid = IndexGetRelation(indexoid, true);
+       if (OidIsValid(heapoid))
+               heapRel = heap_open(heapoid, ShareUpdateExclusiveLock);
+       else
+               heapRel = NULL;
+
+       indexRel = index_open(indexoid, ShareUpdateExclusiveLock);
+
+       /* Must be a BRIN index */
+       if (indexRel->rd_rel->relkind != RELKIND_INDEX ||
+               indexRel->rd_rel->relam != BRIN_AM_OID)
+               ereport(ERROR,
+                               (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+                                errmsg("\"%s\" is not a BRIN index",
+                                               RelationGetRelationName(indexRel))));
+
+       /* User must own the index (comparable to privileges needed for VACUUM) */
+       if (!pg_class_ownercheck(indexoid, GetUserId()))
+               aclcheck_error(ACLCHECK_NOT_OWNER, ACL_KIND_CLASS,
+                                          RelationGetRelationName(indexRel));
+
+       /*
+        * Since we did the IndexGetRelation call above without any lock, it's
+        * barely possible that a race against an index drop/recreation could have
+        * netted us the wrong table.  Recheck.
+        */
+       if (heapRel == NULL || heapoid != IndexGetRelation(indexoid, false))
+               ereport(ERROR,
+                               (errcode(ERRCODE_UNDEFINED_TABLE),
+                                errmsg("could not open parent table of index %s",
+                                               RelationGetRelationName(indexRel))));
+
+       /* the revmap does the hard work */
+       do {
+               done = brinRevmapDesummarizeRange(indexRel, heapBlk);
+       }
+       while (!done);
+
+       relation_close(indexRel, ShareUpdateExclusiveLock);
+       relation_close(heapRel, ShareUpdateExclusiveLock);
+
+       PG_RETURN_VOID();
+}
+
 /*
  * Build a BrinDesc used to create or scan a BRIN index
  */
index 5d45b48fd94490ec57a2ea1e9588e289b4002d78..35e53a2bac27ca2b35bf51ea928f1530cfea6937 100644 (file)
@@ -168,9 +168,12 @@ brinSetHeapBlockItemptr(Buffer buf, BlockNumber pagesPerRange,
        iptr = (ItemPointerData *) contents->rm_tids;
        iptr += HEAPBLK_TO_REVMAP_INDEX(pagesPerRange, heapBlk);
 
-       ItemPointerSet(iptr,
-                                  ItemPointerGetBlockNumber(&tid),
-                                  ItemPointerGetOffsetNumber(&tid));
+       if (ItemPointerIsValid(&tid))
+               ItemPointerSet(iptr,
+                                          ItemPointerGetBlockNumber(&tid),
+                                          ItemPointerGetOffsetNumber(&tid));
+       else
+               ItemPointerSetInvalid(iptr);
 }
 
 /*
@@ -304,6 +307,137 @@ brinGetTupleForHeapBlock(BrinRevmap *revmap, BlockNumber heapBlk,
        return NULL;
 }
 
+/*
+ * Delete an index tuple, marking a page range as unsummarized.
+ *
+ * Index must be locked in ShareUpdateExclusiveLock mode.
+ *
+ * Return FALSE if caller should retry.
+ */
+bool
+brinRevmapDesummarizeRange(Relation idxrel, BlockNumber heapBlk)
+{
+       BrinRevmap *revmap;
+       BlockNumber     pagesPerRange;
+       RevmapContents *contents;
+       ItemPointerData *iptr;
+       ItemPointerData invalidIptr;
+       BlockNumber     revmapBlk;
+       Buffer          revmapBuf;
+       Buffer          regBuf;
+       Page            revmapPg;
+       Page            regPg;
+       OffsetNumber revmapOffset;
+       OffsetNumber regOffset;
+       ItemId          lp;
+       BrinTuple  *tup;
+
+       revmap = brinRevmapInitialize(idxrel, &pagesPerRange, NULL);
+
+       revmapBlk = revmap_get_blkno(revmap, heapBlk);
+       if (!BlockNumberIsValid(revmapBlk))
+       {
+               /* revmap page doesn't exist: range not summarized, we're done */
+               brinRevmapTerminate(revmap);
+               return true;
+       }
+
+       /* Lock the revmap page, obtain the index tuple pointer from it */
+       revmapBuf = brinLockRevmapPageForUpdate(revmap, heapBlk);
+       revmapPg = BufferGetPage(revmapBuf);
+       revmapOffset = HEAPBLK_TO_REVMAP_INDEX(revmap->rm_pagesPerRange, heapBlk);
+
+       contents = (RevmapContents *) PageGetContents(revmapPg);
+       iptr = contents->rm_tids;
+       iptr += revmapOffset;
+
+       if (!ItemPointerIsValid(iptr))
+       {
+               /* no index tuple: range not summarized, we're done */
+               LockBuffer(revmapBuf, BUFFER_LOCK_UNLOCK);
+               brinRevmapTerminate(revmap);
+               return true;
+       }
+
+       regBuf = ReadBuffer(idxrel, ItemPointerGetBlockNumber(iptr));
+       LockBuffer(regBuf, BUFFER_LOCK_EXCLUSIVE);
+       regPg = BufferGetPage(regBuf);
+
+       /* if this is no longer a regular page, tell caller to start over */
+       if (!BRIN_IS_REGULAR_PAGE(regPg))
+       {
+               LockBuffer(revmapBuf, BUFFER_LOCK_UNLOCK);
+               LockBuffer(regBuf, BUFFER_LOCK_UNLOCK);
+               brinRevmapTerminate(revmap);
+               return false;
+       }
+
+       regOffset = ItemPointerGetOffsetNumber(iptr);
+       if (regOffset > PageGetMaxOffsetNumber(regPg))
+               ereport(ERROR,
+                               (errcode(ERRCODE_INDEX_CORRUPTED),
+                                errmsg("corrupted BRIN index: inconsistent range map")));
+
+       lp = PageGetItemId(regPg, regOffset);
+       if (!ItemIdIsUsed(lp))
+               ereport(ERROR,
+                               (errcode(ERRCODE_INDEX_CORRUPTED),
+                                errmsg("corrupted BRIN index: inconsistent range map")));
+       tup = (BrinTuple *) PageGetItem(regPg, lp);
+       /* XXX apply sanity checks?  Might as well delete a bogus tuple ... */
+
+       /*
+        * We're only removing data, not reading it, so there's no need to
+        * TestForOldSnapshot here.
+        */
+
+       /*
+        * Because of SUE lock, this function shouldn't run concurrently with
+        * summarization.  Placeholder tuples can only exist as leftovers from
+        * crashed summarization, so if we detect any, we complain but proceed.
+        */
+       if (BrinTupleIsPlaceholder(tup))
+               ereport(WARNING,
+                               (errmsg("leftover placeholder tuple detected in BRIN index \"%s\", deleting",
+                                               RelationGetRelationName(idxrel))));
+
+       START_CRIT_SECTION();
+
+       ItemPointerSetInvalid(&invalidIptr);
+       brinSetHeapBlockItemptr(revmapBuf, revmap->rm_pagesPerRange, heapBlk,
+                                                       invalidIptr);
+       PageIndexTupleDeleteNoCompact(regPg, regOffset);
+       /* XXX record free space in FSM? */
+
+       MarkBufferDirty(regBuf);
+       MarkBufferDirty(revmapBuf);
+
+       if (RelationNeedsWAL(idxrel))
+       {
+               xl_brin_desummarize xlrec;
+               XLogRecPtr              recptr;
+
+               xlrec.heapBlk = heapBlk;
+               xlrec.regOffset = regOffset;
+
+               XLogBeginInsert();
+               XLogRegisterData((char *) &xlrec, SizeOfBrinDesummarize);
+               XLogRegisterBuffer(0, revmapBuf, 0);
+               XLogRegisterBuffer(1, regBuf, REGBUF_STANDARD);
+               recptr = XLogInsert(RM_BRIN_ID, XLOG_BRIN_DESUMMARIZE);
+               PageSetLSN(revmapPg, recptr);
+               PageSetLSN(regPg, recptr);
+       }
+
+       END_CRIT_SECTION();
+
+       UnlockReleaseBuffer(regBuf);
+       LockBuffer(revmapBuf, BUFFER_LOCK_UNLOCK);
+       brinRevmapTerminate(revmap);
+
+       return true;
+}
+
 /*
  * Given a heap block number, find the corresponding physical revmap block
  * number and return it.  If the revmap page hasn't been allocated yet, return
index f416bacc3f7e3816cb2cd22db8f533e4f1aa3dcd..8f5b5ceb3f29d4ddf96b795fc479f433c8b9084b 100644 (file)
@@ -254,6 +254,46 @@ brin_xlog_revmap_extend(XLogReaderState *record)
                UnlockReleaseBuffer(metabuf);
 }
 
+static void
+brin_xlog_desummarize_page(XLogReaderState *record)
+{
+       XLogRecPtr      lsn = record->EndRecPtr;
+       xl_brin_desummarize *xlrec;
+       Buffer          buffer;
+       XLogRedoAction action;
+
+       xlrec = (xl_brin_desummarize *) XLogRecGetData(record);
+
+       /* Update the revmap */
+       action = XLogReadBufferForRedo(record, 0, &buffer);
+       if (action == BLK_NEEDS_REDO)
+       {
+               ItemPointerData iptr;
+
+               ItemPointerSetInvalid(&iptr);
+               brinSetHeapBlockItemptr(buffer, xlrec->pagesPerRange, xlrec->heapBlk, iptr);
+
+               PageSetLSN(BufferGetPage(buffer), lsn);
+               MarkBufferDirty(buffer);
+       }
+       if (BufferIsValid(buffer))
+               UnlockReleaseBuffer(buffer);
+
+       /* remove the leftover entry from the regular page */
+       action = XLogReadBufferForRedo(record, 1, &buffer);
+       if (action == BLK_NEEDS_REDO)
+       {
+               Page    regPg = BufferGetPage(buffer);
+
+               PageIndexTupleDeleteNoCompact(regPg, xlrec->regOffset);
+
+               PageSetLSN(regPg, lsn);
+               MarkBufferDirty(buffer);
+       }
+       if (BufferIsValid(buffer))
+               UnlockReleaseBuffer(buffer);
+}
+
 void
 brin_redo(XLogReaderState *record)
 {
@@ -276,6 +316,9 @@ brin_redo(XLogReaderState *record)
                case XLOG_BRIN_REVMAP_EXTEND:
                        brin_xlog_revmap_extend(record);
                        break;
+               case XLOG_BRIN_DESUMMARIZE:
+                       brin_xlog_desummarize_page(record);
+                       break;
                default:
                        elog(PANIC, "brin_redo: unknown op code %u", info);
        }
index b58cb5bde91f0af997e1f92b37f2f6dbe5a3001a..8eb5275a8b4833b26aee0d963f54887edeced780 100644 (file)
@@ -61,6 +61,13 @@ brin_desc(StringInfo buf, XLogReaderState *record)
 
                appendStringInfo(buf, "targetBlk %u", xlrec->targetBlk);
        }
+       else if (info == XLOG_BRIN_DESUMMARIZE)
+       {
+               xl_brin_desummarize *xlrec = (xl_brin_desummarize *) rec;
+
+               appendStringInfo(buf, "pagesPerRange %u, heapBlk %u, page offset %u",
+                                                xlrec->pagesPerRange, xlrec->heapBlk, xlrec->regOffset);
+       }
 }
 
 const char *
@@ -91,6 +98,9 @@ brin_identify(uint8 info)
                case XLOG_BRIN_REVMAP_EXTEND:
                        id = "REVMAP_EXTEND";
                        break;
+               case XLOG_BRIN_DESUMMARIZE:
+                       id = "DESUMMARIZE";
+                       break;
        }
 
        return id;
index 2ec4169f6d52f2d510f7cbf5bbc22511f6b04864..7fdcf877f4b2723db4e5b29e816742e24a4f38db 100644 (file)
@@ -36,5 +36,6 @@ extern void brinSetHeapBlockItemptr(Buffer rmbuf, BlockNumber pagesPerRange,
 extern BrinTuple *brinGetTupleForHeapBlock(BrinRevmap *revmap,
                                                 BlockNumber heapBlk, Buffer *buf, OffsetNumber *off,
                                                 Size *size, int mode, Snapshot snapshot);
+extern bool brinRevmapDesummarizeRange(Relation idxrel, BlockNumber heapBlk);
 
 #endif   /* BRIN_REVMAP_H */
index 33ceb34ea53873a49cba9f079889fcc02f8581fe..89ed334a018bcba81041c6709980eeef20f0ff57 100644 (file)
@@ -33,7 +33,7 @@
 #define XLOG_BRIN_UPDATE                       0x20
 #define XLOG_BRIN_SAMEPAGE_UPDATE      0x30
 #define XLOG_BRIN_REVMAP_EXTEND                0x40
-#define XLOG_BRIN_REVMAP_VACUUM                0x50
+#define XLOG_BRIN_DESUMMARIZE          0x50
 
 #define XLOG_BRIN_OPMASK                       0x70
 /*
@@ -124,6 +124,24 @@ typedef struct xl_brin_revmap_extend
 #define SizeOfBrinRevmapExtend (offsetof(xl_brin_revmap_extend, targetBlk) + \
                                                                 sizeof(BlockNumber))
 
+/*
+ * This is what we need to know about a range de-summarization
+ *
+ * Backup block 0: revmap page
+ * Backup block 1: regular page
+ */
+typedef struct xl_brin_desummarize
+{
+       BlockNumber     pagesPerRange;
+       /* page number location to set to invalid */
+       OffsetNumber heapBlk;
+       /* offset of item to delete in regular index page */
+       OffsetNumber regOffset;
+} xl_brin_desummarize;
+
+#define SizeOfBrinDesummarize  (offsetof(xl_brin_desummarize, regOffset) + \
+                                                                sizeof(OffsetNumber))
+
 
 extern void brin_redo(XLogReaderState *record);
 extern void brin_desc(StringInfo buf, XLogReaderState *record);
index fa3dcacd326703d62162cd46870b2a42b71a48cd..1db7a4d715b08bd2baa5861236f11ad0810fb32f 100644 (file)
@@ -53,6 +53,6 @@
  */
 
 /*                                                     yyyymmddN */
-#define CATALOG_VERSION_NO     201704011
+#define CATALOG_VERSION_NO     201704012
 
 #endif
index 1b7ab2a997b87627e84ad29645ced733d6abe7cd..711211d2e6c7a56d8096526aeae788d8fa64234d 100644 (file)
@@ -566,6 +566,8 @@ DATA(insert OID = 3952 (  brin_summarize_new_values PGNSP PGUID 12 1 0 0 0 f f f
 DESCR("brin: standalone scan new table pages");
 DATA(insert OID = 3999 (  brin_summarize_range PGNSP PGUID 12 1 0 0 0 f f f f t f v s 2 0 23 "2205 20" _null_ _null_ _null_ _null_ _null_ brin_summarize_range _null_ _null_ _null_ ));
 DESCR("brin: standalone scan new table pages");
+DATA(insert OID = 4014 (  brin_desummarize_range PGNSP PGUID 12 1 0 0 0 f f f f t f v s 2 0 2278 "2205 20" _null_ _null_ _null_ _null_ _null_ brin_desummarize_range _null_ _null_ _null_ ));
+DESCR("brin: desummarize page range");
 
 DATA(insert OID = 338 (  amvalidate            PGNSP PGUID 12 1 0 0 0 f f f f t f v s 1 0 16 "26" _null_ _null_ _null_ _null_ _null_   amvalidate _null_ _null_ _null_ ));
 DESCR("validate an operator class");
index 3b9c0db833be285ac529369c63ae65a95c8603ae..a40f87aea051824c9f80dc6935ccd16a997083b0 100644 (file)
@@ -392,6 +392,12 @@ INSERT INTO brintest SELECT
        format('%s/%s%s', odd, even, tenthous)::pg_lsn,
        box(point(odd, even), point(thousand, twothousand))
 FROM tenk1 ORDER BY unique2 LIMIT 5 OFFSET 5;
+SELECT brin_desummarize_range('brinidx', 0);
+ brin_desummarize_range 
+------------------------
+(1 row)
+
 VACUUM brintest;  -- force a summarization cycle in brinidx
 UPDATE brintest SET int8col = int8col * int4col;
 UPDATE brintest SET textcol = '' WHERE textcol IS NOT NULL;
@@ -406,6 +412,27 @@ SELECT brin_summarize_new_values('brinidx'); -- ok, no change expected
                          0
 (1 row)
 
+-- Tests for brin_desummarize_range
+SELECT brin_desummarize_range('brinidx', -1); -- error, invalid range
+ERROR:  block number out of range: -1
+SELECT brin_desummarize_range('brinidx', 0);
+ brin_desummarize_range 
+------------------------
+(1 row)
+
+SELECT brin_desummarize_range('brinidx', 0);
+ brin_desummarize_range 
+------------------------
+(1 row)
+
+SELECT brin_desummarize_range('brinidx', 100000000);
+ brin_desummarize_range 
+------------------------
+(1 row)
+
 -- Test brin_summarize_range
 CREATE TABLE brin_summarize (
     value int
index da73df365935bebef13964bffda25c1bb01880c4..521b22fe566d3a5802d5a56b26288c6e21029075 100644 (file)
@@ -400,6 +400,7 @@ INSERT INTO brintest SELECT
        box(point(odd, even), point(thousand, twothousand))
 FROM tenk1 ORDER BY unique2 LIMIT 5 OFFSET 5;
 
+SELECT brin_desummarize_range('brinidx', 0);
 VACUUM brintest;  -- force a summarization cycle in brinidx
 
 UPDATE brintest SET int8col = int8col * int4col;
@@ -410,6 +411,12 @@ SELECT brin_summarize_new_values('brintest'); -- error, not an index
 SELECT brin_summarize_new_values('tenk1_unique1'); -- error, not a BRIN index
 SELECT brin_summarize_new_values('brinidx'); -- ok, no change expected
 
+-- Tests for brin_desummarize_range
+SELECT brin_desummarize_range('brinidx', -1); -- error, invalid range
+SELECT brin_desummarize_range('brinidx', 0);
+SELECT brin_desummarize_range('brinidx', 0);
+SELECT brin_desummarize_range('brinidx', 100000000);
+
 -- Test brin_summarize_range
 CREATE TABLE brin_summarize (
     value int