granicus.if.org Git - postgresql/blob - src/backend/access/hash/hashinsert.c

   1 /*-------------------------------------------------------------------------
   2  *
   3  * hashinsert.c
   4  *        Item insertion in hash tables for Postgres.
   5  *
   6  * Portions Copyright (c) 1996-2015, PostgreSQL Global Development Group
   7  * Portions Copyright (c) 1994, Regents of the University of California
   8  *
   9  *
  10  * IDENTIFICATION
  11  *        src/backend/access/hash/hashinsert.c
  12  *
  13  *-------------------------------------------------------------------------
  14  */
  15
  16 #include "postgres.h"
  17
  18 #include "access/hash.h"
  19 #include "utils/rel.h"
  20
  21
  22 /*
  23  *      _hash_doinsert() -- Handle insertion of a single index tuple.
  24  *
  25  *              This routine is called by the public interface routines, hashbuild
  26  *              and hashinsert.  By here, itup is completely filled in.
  27  */
  28 void
  29 _hash_doinsert(Relation rel, IndexTuple itup)
  30 {
  31         Buffer          buf;
  32         Buffer          metabuf;
  33         HashMetaPage metap;
  34         BlockNumber blkno;
  35         BlockNumber oldblkno = InvalidBlockNumber;
  36         bool            retry = false;
  37         Page            page;
  38         HashPageOpaque pageopaque;
  39         Size            itemsz;
  40         bool            do_expand;
  41         uint32          hashkey;
  42         Bucket          bucket;
  43
  44         /*
  45          * Get the hash key for the item (it's stored in the index tuple itself).
  46          */
  47         hashkey = _hash_get_indextuple_hashkey(itup);
  48
  49         /* compute item size too */
  50         itemsz = IndexTupleDSize(*itup);
  51         itemsz = MAXALIGN(itemsz);      /* be safe, PageAddItem will do this but we
  52                                                                  * need to be consistent */
  53
  54         /* Read the metapage */
  55         metabuf = _hash_getbuf(rel, HASH_METAPAGE, HASH_READ, LH_META_PAGE);
  56         metap = HashPageGetMeta(BufferGetPage(metabuf));
  57
  58         /*
  59          * Check whether the item can fit on a hash page at all. (Eventually, we
  60          * ought to try to apply TOAST methods if not.)  Note that at this point,
  61          * itemsz doesn't include the ItemId.
  62          *
  63          * XXX this is useless code if we are only storing hash keys.
  64          */
  65         if (itemsz > HashMaxItemSize((Page) metap))
  66                 ereport(ERROR,
  67                                 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
  68                                  errmsg("index row size %zu exceeds hash maximum %zu",
  69                                                 itemsz, HashMaxItemSize((Page) metap)),
  70                         errhint("Values larger than a buffer page cannot be indexed.")));
  71
  72         /*
  73          * Loop until we get a lock on the correct target bucket.
  74          */
  75         for (;;)
  76         {
  77                 /*
  78                  * Compute the target bucket number, and convert to block number.
  79                  */
  80                 bucket = _hash_hashkey2bucket(hashkey,
  81                                                                           metap->hashm_maxbucket,
  82                                                                           metap->hashm_highmask,
  83                                                                           metap->hashm_lowmask);
  84
  85                 blkno = BUCKET_TO_BLKNO(metap, bucket);
  86
  87                 /* Release metapage lock, but keep pin. */
  88                 _hash_chgbufaccess(rel, metabuf, HASH_READ, HASH_NOLOCK);
  89
  90                 /*
  91                  * If the previous iteration of this loop locked what is still the
  92                  * correct target bucket, we are done.  Otherwise, drop any old lock
  93                  * and lock what now appears to be the correct bucket.
  94                  */
  95                 if (retry)
  96                 {
  97                         if (oldblkno == blkno)
  98                                 break;
  99                         _hash_droplock(rel, oldblkno, HASH_SHARE);
 100                 }
 101                 _hash_getlock(rel, blkno, HASH_SHARE);
 102
 103                 /*
 104                  * Reacquire metapage lock and check that no bucket split has taken
 105                  * place while we were awaiting the bucket lock.
 106                  */
 107                 _hash_chgbufaccess(rel, metabuf, HASH_NOLOCK, HASH_READ);
 108                 oldblkno = blkno;
 109                 retry = true;
 110         }
 111
 112         /* Fetch the primary bucket page for the bucket */
 113         buf = _hash_getbuf(rel, blkno, HASH_WRITE, LH_BUCKET_PAGE);
 114         page = BufferGetPage(buf);
 115         pageopaque = (HashPageOpaque) PageGetSpecialPointer(page);
 116         Assert(pageopaque->hasho_bucket == bucket);
 117
 118         /* Do the insertion */
 119         while (PageGetFreeSpace(page) < itemsz)
 120         {
 121                 /*
 122                  * no space on this page; check for an overflow page
 123                  */
 124                 BlockNumber nextblkno = pageopaque->hasho_nextblkno;
 125
 126                 if (BlockNumberIsValid(nextblkno))
 127                 {
 128                         /*
 129                          * ovfl page exists; go get it.  if it doesn't have room, we'll
 130                          * find out next pass through the loop test above.
 131                          */
 132                         _hash_relbuf(rel, buf);
 133                         buf = _hash_getbuf(rel, nextblkno, HASH_WRITE, LH_OVERFLOW_PAGE);
 134                         page = BufferGetPage(buf);
 135                 }
 136                 else
 137                 {
 138                         /*
 139                          * we're at the end of the bucket chain and we haven't found a
 140                          * page with enough room.  allocate a new overflow page.
 141                          */
 142
 143                         /* release our write lock without modifying buffer */
 144                         _hash_chgbufaccess(rel, buf, HASH_READ, HASH_NOLOCK);
 145
 146                         /* chain to a new overflow page */
 147                         buf = _hash_addovflpage(rel, metabuf, buf);
 148                         page = BufferGetPage(buf);
 149
 150                         /* should fit now, given test above */
 151                         Assert(PageGetFreeSpace(page) >= itemsz);
 152                 }
 153                 pageopaque = (HashPageOpaque) PageGetSpecialPointer(page);
 154                 Assert(pageopaque->hasho_flag == LH_OVERFLOW_PAGE);
 155                 Assert(pageopaque->hasho_bucket == bucket);
 156         }
 157
 158         /* found page with enough space, so add the item here */
 159         (void) _hash_pgaddtup(rel, buf, itemsz, itup);
 160
 161         /* write and release the modified page */
 162         _hash_wrtbuf(rel, buf);
 163
 164         /* We can drop the bucket lock now */
 165         _hash_droplock(rel, blkno, HASH_SHARE);
 166
 167         /*
 168          * Write-lock the metapage so we can increment the tuple count. After
 169          * incrementing it, check to see if it's time for a split.
 170          */
 171         _hash_chgbufaccess(rel, metabuf, HASH_NOLOCK, HASH_WRITE);
 172
 173         metap->hashm_ntuples += 1;
 174
 175         /* Make sure this stays in sync with _hash_expandtable() */
 176         do_expand = metap->hashm_ntuples >
 177                 (double) metap->hashm_ffactor * (metap->hashm_maxbucket + 1);
 178
 179         /* Write out the metapage and drop lock, but keep pin */
 180         _hash_chgbufaccess(rel, metabuf, HASH_WRITE, HASH_NOLOCK);
 181
 182         /* Attempt to split if a split is needed */
 183         if (do_expand)
 184                 _hash_expandtable(rel, metabuf);
 185
 186         /* Finally drop our pin on the metapage */
 187         _hash_dropbuf(rel, metabuf);
 188 }
 189
 190 /*
 191  *      _hash_pgaddtup() -- add a tuple to a particular page in the index.
 192  *
 193  * This routine adds the tuple to the page as requested; it does not write out
 194  * the page.  It is an error to call pgaddtup() without pin and write lock on
 195  * the target buffer.
 196  *
 197  * Returns the offset number at which the tuple was inserted.  This function
 198  * is responsible for preserving the condition that tuples in a hash index
 199  * page are sorted by hashkey value.
 200  */
 201 OffsetNumber
 202 _hash_pgaddtup(Relation rel, Buffer buf, Size itemsize, IndexTuple itup)
 203 {
 204         OffsetNumber itup_off;
 205         Page            page;
 206         uint32          hashkey;
 207
 208         _hash_checkpage(rel, buf, LH_BUCKET_PAGE | LH_OVERFLOW_PAGE);
 209         page = BufferGetPage(buf);
 210
 211         /* Find where to insert the tuple (preserving page's hashkey ordering) */
 212         hashkey = _hash_get_indextuple_hashkey(itup);
 213         itup_off = _hash_binsearch(page, hashkey);
 214
 215         if (PageAddItem(page, (Item) itup, itemsize, itup_off, false, false)
 216                 == InvalidOffsetNumber)
 217                 elog(ERROR, "failed to add index item to \"%s\"",
 218                          RelationGetRelationName(rel));
 219
 220         return itup_off;
 221 }