-$PostgreSQL: pgsql/src/backend/access/hash/README,v 1.6 2007/04/19 20:24:04 tgl Exp $
+$PostgreSQL: pgsql/src/backend/access/hash/README,v 1.7 2008/03/15 20:46:31 tgl Exp $
This directory contains an implementation of hash indexing for Postgres. Most
of the core ideas are taken from Margo Seltzer and Ozan Yigit, A New Hashing
former. The difference between the two represents the number of overflow
pages appearing between the bucket page groups of splitpoints N and N+1.
+(Note: the above describes what happens when filling an initially minimally
+sized hash index. In practice, we try to estimate the required index size
+and allocate a suitable number of splitpoints immediately, to avoid
+expensive re-splitting during initial index build.)
+
When S splitpoints exist altogether, the array entries hashm_spares[0]
through hashm_spares[S] are valid; hashm_spares[S] records the current
total number of overflow pages. New overflow pages are created as needed
pages are a subset of the overflow pages. It turns out in fact that each
bitmap page's first bit represents itself --- this is not an essential
property, but falls out of the fact that we only allocate another bitmap
-page when we really need one. Bit number zero always corresponds to block
-number 3, which is the first bitmap page and is allocated during index
-creation.
+page when we really need one. Bit number zero always corresponds to the
+first bitmap page, which is allocated during index creation just after all
+the initially created buckets.
Lock definitions
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/access/hash/hash.c,v 1.98 2008/01/01 19:45:46 momjian Exp $
+ * $PostgreSQL: pgsql/src/backend/access/hash/hash.c,v 1.99 2008/03/15 20:46:31 tgl Exp $
*
* NOTES
* This file contains only the public interface routines.
#include "access/hash.h"
#include "catalog/index.h"
#include "commands/vacuum.h"
+#include "optimizer/plancat.h"
/* Working state for hashbuild and its callback */
Relation index = (Relation) PG_GETARG_POINTER(1);
IndexInfo *indexInfo = (IndexInfo *) PG_GETARG_POINTER(2);
IndexBuildResult *result;
+ BlockNumber relpages;
double reltuples;
HashBuildState buildstate;
elog(ERROR, "index \"%s\" already contains data",
RelationGetRelationName(index));
- /* initialize the hash index metadata page */
- _hash_metapinit(index);
+ /* estimate the number of rows currently present in the table */
+ estimate_rel_size(heap, NULL, &relpages, &reltuples);
+
+ /* initialize the hash index metadata page and initial buckets */
+ _hash_metapinit(index, reltuples);
/* build the index */
buildstate.indtuples = 0;
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/access/hash/hashpage.c,v 1.72 2008/01/01 19:45:46 momjian Exp $
+ * $PostgreSQL: pgsql/src/backend/access/hash/hashpage.c,v 1.73 2008/03/15 20:46:31 tgl Exp $
*
* NOTES
* Postgres hash pages look like ordinary relation pages. The opaque
/*
* _hash_metapinit() -- Initialize the metadata page of a hash index,
- * the two buckets that we begin with and the initial
- * bitmap page.
+ * the initial buckets, and the initial bitmap page.
+ *
+ * The initial number of buckets is dependent on num_tuples, an estimate
+ * of the number of tuples to be loaded into the index initially.
*
* We are fairly cavalier about locking here, since we know that no one else
* could be accessing this index. In particular the rule about not holding
* multiple buffer locks is ignored.
*/
void
-_hash_metapinit(Relation rel)
+_hash_metapinit(Relation rel, double num_tuples)
{
HashMetaPage metap;
HashPageOpaque pageopaque;
int32 data_width;
int32 item_width;
int32 ffactor;
- uint16 i;
+ double dnumbuckets;
+ uint32 num_buckets;
+ uint32 log2_num_buckets;
+ uint32 i;
/* safety check */
if (RelationGetNumberOfBlocks(rel) != 0)
ffactor = 10;
/*
- * We initialize the metapage, the first two bucket pages, and the first
+ * Choose the number of initial bucket pages to match the fill factor
+ * given the estimated number of tuples. We round up the result to the
+ * next power of 2, however, and always force at least 2 bucket pages.
+ * The upper limit is determined by considerations explained in
+ * _hash_expandtable().
+ */
+ dnumbuckets = num_tuples / ffactor;
+ if (dnumbuckets <= 2.0)
+ num_buckets = 2;
+ else if (dnumbuckets >= (double) 0x40000000)
+ num_buckets = 0x40000000;
+ else
+ num_buckets = ((uint32) 1) << _hash_log2((uint32) dnumbuckets);
+
+ log2_num_buckets = _hash_log2(num_buckets);
+ Assert(num_buckets == (((uint32) 1) << log2_num_buckets));
+ Assert(log2_num_buckets < HASH_MAX_SPLITPOINTS);
+
+ /*
+ * We initialize the metapage, the first N bucket pages, and the first
* bitmap page in sequence, using _hash_getnewbuf to cause smgrextend()
* calls to occur. This ensures that the smgr level has the right idea of
* the physical index length.
metap->hashm_procid = index_getprocid(rel, 1, HASHPROC);
/*
- * We initialize the index with two buckets, 0 and 1, occupying physical
- * blocks 1 and 2. The first freespace bitmap page is in block 3.
+ * We initialize the index with N buckets, 0 .. N-1, occupying physical
+ * blocks 1 to N. The first freespace bitmap page is in block N+1.
+ * Since N is a power of 2, we can set the masks this way:
*/
- metap->hashm_maxbucket = metap->hashm_lowmask = 1; /* nbuckets - 1 */
- metap->hashm_highmask = 3; /* (nbuckets << 1) - 1 */
+ metap->hashm_maxbucket = metap->hashm_lowmask = num_buckets - 1;
+ metap->hashm_highmask = (num_buckets << 1) - 1;
MemSet(metap->hashm_spares, 0, sizeof(metap->hashm_spares));
MemSet(metap->hashm_mapp, 0, sizeof(metap->hashm_mapp));
- metap->hashm_spares[1] = 1; /* the first bitmap page is only spare */
- metap->hashm_ovflpoint = 1;
+ /* Set up mapping for one spare page after the initial splitpoints */
+ metap->hashm_spares[log2_num_buckets] = 1;
+ metap->hashm_ovflpoint = log2_num_buckets;
metap->hashm_firstfree = 0;
/*
- * Initialize the first two buckets
+ * Initialize the first N buckets
*/
- for (i = 0; i <= 1; i++)
+ for (i = 0; i < num_buckets; i++)
{
buf = _hash_getnewbuf(rel, BUCKET_TO_BLKNO(metap, i));
pg = BufferGetPage(buf);
/*
* Initialize first bitmap page
*/
- _hash_initbitmap(rel, metap, 3);
+ _hash_initbitmap(rel, metap, num_buckets + 1);
/* all done */
_hash_wrtbuf(rel, metabuf);
* index with 2^32 buckets would certainly overflow BlockNumber and hence
* _hash_alloc_buckets() would fail, but if we supported buckets smaller
* than a disk block then this would be an independent constraint.
+ *
+ * If you change this, see also the maximum initial number of buckets
+ * in _hash_metapinit().
*/
if (metap->hashm_maxbucket >= (uint32) 0x7FFFFFFE)
goto fail;
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/optimizer/util/plancat.c,v 1.140 2008/01/12 00:11:39 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/optimizer/util/plancat.c,v 1.141 2008/03/15 20:46:31 tgl Exp $
*
*-------------------------------------------------------------------------
*/
get_relation_info_hook_type get_relation_info_hook = NULL;
-static void estimate_rel_size(Relation rel, int32 *attr_widths,
- BlockNumber *pages, double *tuples);
static List *get_relation_constraints(Oid relationObjectId, RelOptInfo *rel,
bool include_notnull);
* relation's attr_width[] cache; we fill this in if we have need to compute
* the attribute widths for estimation purposes.
*/
-static void
+void
estimate_rel_size(Relation rel, int32 *attr_widths,
BlockNumber *pages, double *tuples)
{
* Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
- * $PostgreSQL: pgsql/src/include/access/hash.h,v 1.84 2008/01/01 19:45:56 momjian Exp $
+ * $PostgreSQL: pgsql/src/include/access/hash.h,v 1.85 2008/03/15 20:46:31 tgl Exp $
*
* NOTES
* modeled after Margo Seltzer's hash implementation for unix.
extern void _hash_wrtbuf(Relation rel, Buffer buf);
extern void _hash_chgbufaccess(Relation rel, Buffer buf, int from_access,
int to_access);
-extern void _hash_metapinit(Relation rel);
+extern void _hash_metapinit(Relation rel, double num_tuples);
extern void _hash_pageinit(Page page, Size size);
extern void _hash_expandtable(Relation rel, Buffer metabuf);
* Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
- * $PostgreSQL: pgsql/src/include/optimizer/plancat.h,v 1.47 2008/01/01 19:45:58 momjian Exp $
+ * $PostgreSQL: pgsql/src/include/optimizer/plancat.h,v 1.48 2008/03/15 20:46:31 tgl Exp $
*
*-------------------------------------------------------------------------
*/
#define PLANCAT_H
#include "nodes/relation.h"
+#include "utils/rel.h"
/* Hook for plugins to get control in get_relation_info() */
typedef void (*get_relation_info_hook_type) (PlannerInfo *root,
extern void get_relation_info(PlannerInfo *root, Oid relationObjectId,
bool inhparent, RelOptInfo *rel);
+extern void estimate_rel_size(Relation rel, int32 *attr_widths,
+ BlockNumber *pages, double *tuples);
+
extern bool relation_excluded_by_constraints(RelOptInfo *rel,
RangeTblEntry *rte);