granicus.if.org Git - postgresql/blob - src/backend/access/hash/hashinsert.c

   1 /*-------------------------------------------------------------------------
   2  *
   3  * hashinsert.c
   4  *        Item insertion in hash tables for Postgres.
   5  *
   6  * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
   7  * Portions Copyright (c) 1994, Regents of the University of California
   8  *
   9  *
  10  * IDENTIFICATION
  11  *        src/backend/access/hash/hashinsert.c
  12  *
  13  *-------------------------------------------------------------------------
  14  */
  15
  16 #include "postgres.h"
  17
  18 #include "access/hash.h"
  19 #include "utils/rel.h"
  20
  21
  22 /*
  23  *      _hash_doinsert() -- Handle insertion of a single index tuple.
  24  *
  25  *              This routine is called by the public interface routines, hashbuild
  26  *              and hashinsert.  By here, itup is completely filled in.
  27  */
  28 void
  29 _hash_doinsert(Relation rel, IndexTuple itup)
  30 {
  31         Buffer          buf = InvalidBuffer;
  32         Buffer          bucket_buf;
  33         Buffer          metabuf;
  34         HashMetaPage metap;
  35         HashMetaPage usedmetap = NULL;
  36         Page            metapage;
  37         Page            page;
  38         HashPageOpaque pageopaque;
  39         Size            itemsz;
  40         bool            do_expand;
  41         uint32          hashkey;
  42         Bucket          bucket;
  43
  44         /*
  45          * Get the hash key for the item (it's stored in the index tuple itself).
  46          */
  47         hashkey = _hash_get_indextuple_hashkey(itup);
  48
  49         /* compute item size too */
  50         itemsz = IndexTupleDSize(*itup);
  51         itemsz = MAXALIGN(itemsz);      /* be safe, PageAddItem will do this but we
  52                                                                  * need to be consistent */
  53
  54 restart_insert:
  55
  56         /*
  57          * Read the metapage.  We don't lock it yet; HashMaxItemSize() will
  58          * examine pd_pagesize_version, but that can't change so we can examine
  59          * it without a lock.
  60          */
  61         metabuf = _hash_getbuf(rel, HASH_METAPAGE, HASH_NOLOCK, LH_META_PAGE);
  62         metapage = BufferGetPage(metabuf);
  63
  64         /*
  65          * Check whether the item can fit on a hash page at all. (Eventually, we
  66          * ought to try to apply TOAST methods if not.)  Note that at this point,
  67          * itemsz doesn't include the ItemId.
  68          *
  69          * XXX this is useless code if we are only storing hash keys.
  70          */
  71         if (itemsz > HashMaxItemSize(metapage))
  72                 ereport(ERROR,
  73                                 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
  74                                  errmsg("index row size %zu exceeds hash maximum %zu",
  75                                                 itemsz, HashMaxItemSize(metapage)),
  76                         errhint("Values larger than a buffer page cannot be indexed.")));
  77
  78         /* Lock the primary bucket page for the target bucket. */
  79         buf = _hash_getbucketbuf_from_hashkey(rel, hashkey, HASH_WRITE,
  80                                                                                   &usedmetap);
  81         Assert(usedmetap != NULL);
  82
  83         /* remember the primary bucket buffer to release the pin on it at end. */
  84         bucket_buf = buf;
  85
  86         page = BufferGetPage(buf);
  87         pageopaque = (HashPageOpaque) PageGetSpecialPointer(page);
  88         bucket = pageopaque->hasho_bucket;
  89
  90         /*
  91          * If this bucket is in the process of being split, try to finish the
  92          * split before inserting, because that might create room for the
  93          * insertion to proceed without allocating an additional overflow page.
  94          * It's only interesting to finish the split if we're trying to insert
  95          * into the bucket from which we're removing tuples (the "old" bucket),
  96          * not if we're trying to insert into the bucket into which tuples are
  97          * being moved (the "new" bucket).
  98          */
  99         if (H_BUCKET_BEING_SPLIT(pageopaque) && IsBufferCleanupOK(buf))
 100         {
 101                 /* release the lock on bucket buffer, before completing the split. */
 102                 LockBuffer(buf, BUFFER_LOCK_UNLOCK);
 103
 104                 _hash_finish_split(rel, metabuf, buf, bucket,
 105                                                    usedmetap->hashm_maxbucket,
 106                                                    usedmetap->hashm_highmask,
 107                                                    usedmetap->hashm_lowmask);
 108
 109                 /* release the pin on old and meta buffer.  retry for insert. */
 110                 _hash_dropbuf(rel, buf);
 111                 _hash_dropbuf(rel, metabuf);
 112                 goto restart_insert;
 113         }
 114
 115         /* Do the insertion */
 116         while (PageGetFreeSpace(page) < itemsz)
 117         {
 118                 /*
 119                  * no space on this page; check for an overflow page
 120                  */
 121                 BlockNumber nextblkno = pageopaque->hasho_nextblkno;
 122
 123                 if (BlockNumberIsValid(nextblkno))
 124                 {
 125                         /*
 126                          * ovfl page exists; go get it.  if it doesn't have room, we'll
 127                          * find out next pass through the loop test above.  we always
 128                          * release both the lock and pin if this is an overflow page, but
 129                          * only the lock if this is the primary bucket page, since the pin
 130                          * on the primary bucket must be retained throughout the scan.
 131                          */
 132                         if (buf != bucket_buf)
 133                                 _hash_relbuf(rel, buf);
 134                         else
 135                                 LockBuffer(buf, BUFFER_LOCK_UNLOCK);
 136                         buf = _hash_getbuf(rel, nextblkno, HASH_WRITE, LH_OVERFLOW_PAGE);
 137                         page = BufferGetPage(buf);
 138                 }
 139                 else
 140                 {
 141                         /*
 142                          * we're at the end of the bucket chain and we haven't found a
 143                          * page with enough room.  allocate a new overflow page.
 144                          */
 145
 146                         /* release our write lock without modifying buffer */
 147                         LockBuffer(buf, BUFFER_LOCK_UNLOCK);
 148
 149                         /* chain to a new overflow page */
 150                         buf = _hash_addovflpage(rel, metabuf, buf, (buf == bucket_buf) ? true : false);
 151                         page = BufferGetPage(buf);
 152
 153                         /* should fit now, given test above */
 154                         Assert(PageGetFreeSpace(page) >= itemsz);
 155                 }
 156                 pageopaque = (HashPageOpaque) PageGetSpecialPointer(page);
 157                 Assert(pageopaque->hasho_flag == LH_OVERFLOW_PAGE);
 158                 Assert(pageopaque->hasho_bucket == bucket);
 159         }
 160
 161         /* found page with enough space, so add the item here */
 162         (void) _hash_pgaddtup(rel, buf, itemsz, itup);
 163
 164         /*
 165          * dirty and release the modified page.  if the page we modified was an
 166          * overflow page, we also need to separately drop the pin we retained on
 167          * the primary bucket page.
 168          */
 169         MarkBufferDirty(buf);
 170         _hash_relbuf(rel, buf);
 171         if (buf != bucket_buf)
 172                 _hash_dropbuf(rel, bucket_buf);
 173
 174         /*
 175          * Write-lock the metapage so we can increment the tuple count. After
 176          * incrementing it, check to see if it's time for a split.
 177          */
 178         LockBuffer(metabuf, BUFFER_LOCK_EXCLUSIVE);
 179
 180         metap = HashPageGetMeta(metapage);
 181         metap->hashm_ntuples += 1;
 182
 183         /* Make sure this stays in sync with _hash_expandtable() */
 184         do_expand = metap->hashm_ntuples >
 185                 (double) metap->hashm_ffactor * (metap->hashm_maxbucket + 1);
 186
 187         /* Write out the metapage and drop lock, but keep pin */
 188         MarkBufferDirty(metabuf);
 189         LockBuffer(metabuf, BUFFER_LOCK_UNLOCK);
 190
 191         /* Attempt to split if a split is needed */
 192         if (do_expand)
 193                 _hash_expandtable(rel, metabuf);
 194
 195         /* Finally drop our pin on the metapage */
 196         _hash_dropbuf(rel, metabuf);
 197 }
 198
 199 /*
 200  *      _hash_pgaddtup() -- add a tuple to a particular page in the index.
 201  *
 202  * This routine adds the tuple to the page as requested; it does not write out
 203  * the page.  It is an error to call pgaddtup() without pin and write lock on
 204  * the target buffer.
 205  *
 206  * Returns the offset number at which the tuple was inserted.  This function
 207  * is responsible for preserving the condition that tuples in a hash index
 208  * page are sorted by hashkey value.
 209  */
 210 OffsetNumber
 211 _hash_pgaddtup(Relation rel, Buffer buf, Size itemsize, IndexTuple itup)
 212 {
 213         OffsetNumber itup_off;
 214         Page            page;
 215         uint32          hashkey;
 216
 217         _hash_checkpage(rel, buf, LH_BUCKET_PAGE | LH_OVERFLOW_PAGE);
 218         page = BufferGetPage(buf);
 219
 220         /* Find where to insert the tuple (preserving page's hashkey ordering) */
 221         hashkey = _hash_get_indextuple_hashkey(itup);
 222         itup_off = _hash_binsearch(page, hashkey);
 223
 224         if (PageAddItem(page, (Item) itup, itemsize, itup_off, false, false)
 225                 == InvalidOffsetNumber)
 226                 elog(ERROR, "failed to add index item to \"%s\"",
 227                          RelationGetRelationName(rel));
 228
 229         return itup_off;
 230 }
 231
 232 /*
 233  *      _hash_pgaddmultitup() -- add a tuple vector to a particular page in the
 234  *                                                       index.
 235  *
 236  * This routine has same requirements for locking and tuple ordering as
 237  * _hash_pgaddtup().
 238  *
 239  * Returns the offset number array at which the tuples were inserted.
 240  */
 241 void
 242 _hash_pgaddmultitup(Relation rel, Buffer buf, IndexTuple *itups,
 243                                         OffsetNumber *itup_offsets, uint16 nitups)
 244 {
 245         OffsetNumber itup_off;
 246         Page            page;
 247         uint32          hashkey;
 248         int                     i;
 249
 250         _hash_checkpage(rel, buf, LH_BUCKET_PAGE | LH_OVERFLOW_PAGE);
 251         page = BufferGetPage(buf);
 252
 253         for (i = 0; i < nitups; i++)
 254         {
 255                 Size            itemsize;
 256
 257                 itemsize = IndexTupleDSize(*itups[i]);
 258                 itemsize = MAXALIGN(itemsize);
 259
 260                 /* Find where to insert the tuple (preserving page's hashkey ordering) */
 261                 hashkey = _hash_get_indextuple_hashkey(itups[i]);
 262                 itup_off = _hash_binsearch(page, hashkey);
 263
 264                 itup_offsets[i] = itup_off;
 265
 266                 if (PageAddItem(page, (Item) itups[i], itemsize, itup_off, false, false)
 267                         == InvalidOffsetNumber)
 268                         elog(ERROR, "failed to add index item to \"%s\"",
 269                                  RelationGetRelationName(rel));
 270         }
 271 }