granicus.if.org Git - postgresql/blob - src/backend/executor/execGrouping.c

   1 /*-------------------------------------------------------------------------
   2  *
   3  * execGrouping.c
   4  *        executor utility routines for grouping, hashing, and aggregation
   5  *
   6  * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
   7  * Portions Copyright (c) 1994, Regents of the University of California
   8  *
   9  *
  10  * IDENTIFICATION
  11  *        $PostgreSQL: pgsql/src/backend/executor/execGrouping.c,v 1.22 2007/01/05 22:19:27 momjian Exp $
  12  *
  13  *-------------------------------------------------------------------------
  14  */
  15 #include "postgres.h"
  16
  17 #include "executor/executor.h"
  18 #include "parser/parse_oper.h"
  19 #include "utils/lsyscache.h"
  20 #include "utils/memutils.h"
  21 #include "utils/syscache.h"
  22
  23
  24 static TupleHashTable CurTupleHashTable = NULL;
  25
  26 static uint32 TupleHashTableHash(const void *key, Size keysize);
  27 static int TupleHashTableMatch(const void *key1, const void *key2,
  28                                         Size keysize);
  29
  30
  31 /*****************************************************************************
  32  *              Utility routines for grouping tuples together
  33  *****************************************************************************/
  34
  35 /*
  36  * execTuplesMatch
  37  *              Return true if two tuples match in all the indicated fields.
  38  *
  39  * This actually implements SQL's notion of "not distinct".  Two nulls
  40  * match, a null and a not-null don't match.
  41  *
  42  * slot1, slot2: the tuples to compare (must have same columns!)
  43  * numCols: the number of attributes to be examined
  44  * matchColIdx: array of attribute column numbers
  45  * eqFunctions: array of fmgr lookup info for the equality functions to use
  46  * evalContext: short-term memory context for executing the functions
  47  *
  48  * NB: evalContext is reset each time!
  49  */
  50 bool
  51 execTuplesMatch(TupleTableSlot *slot1,
  52                                 TupleTableSlot *slot2,
  53                                 int numCols,
  54                                 AttrNumber *matchColIdx,
  55                                 FmgrInfo *eqfunctions,
  56                                 MemoryContext evalContext)
  57 {
  58         MemoryContext oldContext;
  59         bool            result;
  60         int                     i;
  61
  62         /* Reset and switch into the temp context. */
  63         MemoryContextReset(evalContext);
  64         oldContext = MemoryContextSwitchTo(evalContext);
  65
  66         /*
  67          * We cannot report a match without checking all the fields, but we can
  68          * report a non-match as soon as we find unequal fields.  So, start
  69          * comparing at the last field (least significant sort key). That's the
  70          * most likely to be different if we are dealing with sorted input.
  71          */
  72         result = true;
  73
  74         for (i = numCols; --i >= 0;)
  75         {
  76                 AttrNumber      att = matchColIdx[i];
  77                 Datum           attr1,
  78                                         attr2;
  79                 bool            isNull1,
  80                                         isNull2;
  81
  82                 attr1 = slot_getattr(slot1, att, &isNull1);
  83
  84                 attr2 = slot_getattr(slot2, att, &isNull2);
  85
  86                 if (isNull1 != isNull2)
  87                 {
  88                         result = false;         /* one null and one not; they aren't equal */
  89                         break;
  90                 }
  91
  92                 if (isNull1)
  93                         continue;                       /* both are null, treat as equal */
  94
  95                 /* Apply the type-specific equality function */
  96
  97                 if (!DatumGetBool(FunctionCall2(&eqfunctions[i],
  98                                                                                 attr1, attr2)))
  99                 {
 100                         result = false;         /* they aren't equal */
 101                         break;
 102                 }
 103         }
 104
 105         MemoryContextSwitchTo(oldContext);
 106
 107         return result;
 108 }
 109
 110 /*
 111  * execTuplesUnequal
 112  *              Return true if two tuples are definitely unequal in the indicated
 113  *              fields.
 114  *
 115  * Nulls are neither equal nor unequal to anything else.  A true result
 116  * is obtained only if there are non-null fields that compare not-equal.
 117  *
 118  * Parameters are identical to execTuplesMatch.
 119  */
 120 bool
 121 execTuplesUnequal(TupleTableSlot *slot1,
 122                                   TupleTableSlot *slot2,
 123                                   int numCols,
 124                                   AttrNumber *matchColIdx,
 125                                   FmgrInfo *eqfunctions,
 126                                   MemoryContext evalContext)
 127 {
 128         MemoryContext oldContext;
 129         bool            result;
 130         int                     i;
 131
 132         /* Reset and switch into the temp context. */
 133         MemoryContextReset(evalContext);
 134         oldContext = MemoryContextSwitchTo(evalContext);
 135
 136         /*
 137          * We cannot report a match without checking all the fields, but we can
 138          * report a non-match as soon as we find unequal fields.  So, start
 139          * comparing at the last field (least significant sort key). That's the
 140          * most likely to be different if we are dealing with sorted input.
 141          */
 142         result = false;
 143
 144         for (i = numCols; --i >= 0;)
 145         {
 146                 AttrNumber      att = matchColIdx[i];
 147                 Datum           attr1,
 148                                         attr2;
 149                 bool            isNull1,
 150                                         isNull2;
 151
 152                 attr1 = slot_getattr(slot1, att, &isNull1);
 153
 154                 if (isNull1)
 155                         continue;                       /* can't prove anything here */
 156
 157                 attr2 = slot_getattr(slot2, att, &isNull2);
 158
 159                 if (isNull2)
 160                         continue;                       /* can't prove anything here */
 161
 162                 /* Apply the type-specific equality function */
 163
 164                 if (!DatumGetBool(FunctionCall2(&eqfunctions[i],
 165                                                                                 attr1, attr2)))
 166                 {
 167                         result = true;          /* they are unequal */
 168                         break;
 169                 }
 170         }
 171
 172         MemoryContextSwitchTo(oldContext);
 173
 174         return result;
 175 }
 176
 177
 178 /*
 179  * execTuplesMatchPrepare
 180  *              Look up the equality functions needed for execTuplesMatch or
 181  *              execTuplesUnequal.
 182  *
 183  * The result is a palloc'd array.
 184  */
 185 FmgrInfo *
 186 execTuplesMatchPrepare(TupleDesc tupdesc,
 187                                            int numCols,
 188                                            AttrNumber *matchColIdx)
 189 {
 190         FmgrInfo   *eqfunctions = (FmgrInfo *) palloc(numCols * sizeof(FmgrInfo));
 191         int                     i;
 192
 193         for (i = 0; i < numCols; i++)
 194         {
 195                 AttrNumber      att = matchColIdx[i];
 196                 Oid                     typid = tupdesc->attrs[att - 1]->atttypid;
 197                 Oid                     eq_function;
 198
 199                 eq_function = equality_oper_funcid(typid);
 200                 fmgr_info(eq_function, &eqfunctions[i]);
 201         }
 202
 203         return eqfunctions;
 204 }
 205
 206 /*
 207  * execTuplesHashPrepare
 208  *              Look up the equality and hashing functions needed for a TupleHashTable.
 209  *
 210  * This is similar to execTuplesMatchPrepare, but we also need to find the
 211  * hash functions associated with the equality operators.  *eqfunctions and
 212  * *hashfunctions receive the palloc'd result arrays.
 213  */
 214 void
 215 execTuplesHashPrepare(TupleDesc tupdesc,
 216                                           int numCols,
 217                                           AttrNumber *matchColIdx,
 218                                           FmgrInfo **eqfunctions,
 219                                           FmgrInfo **hashfunctions)
 220 {
 221         int                     i;
 222
 223         *eqfunctions = (FmgrInfo *) palloc(numCols * sizeof(FmgrInfo));
 224         *hashfunctions = (FmgrInfo *) palloc(numCols * sizeof(FmgrInfo));
 225
 226         for (i = 0; i < numCols; i++)
 227         {
 228                 AttrNumber      att = matchColIdx[i];
 229                 Oid                     typid = tupdesc->attrs[att - 1]->atttypid;
 230                 Operator        optup;
 231                 Oid                     eq_opr;
 232                 Oid                     eq_function;
 233                 Oid                     hash_function;
 234
 235                 optup = equality_oper(typid, false);
 236                 eq_opr = oprid(optup);
 237                 eq_function = oprfuncid(optup);
 238                 ReleaseSysCache(optup);
 239                 hash_function = get_op_hash_function(eq_opr);
 240                 if (!OidIsValid(hash_function)) /* should not happen */
 241                         elog(ERROR, "could not find hash function for hash operator %u",
 242                                  eq_opr);
 243                 fmgr_info(eq_function, &(*eqfunctions)[i]);
 244                 fmgr_info(hash_function, &(*hashfunctions)[i]);
 245         }
 246 }
 247
 248
 249 /*****************************************************************************
 250  *              Utility routines for all-in-memory hash tables
 251  *
 252  * These routines build hash tables for grouping tuples together (eg, for
 253  * hash aggregation).  There is one entry for each not-distinct set of tuples
 254  * presented.
 255  *****************************************************************************/
 256
 257 /*
 258  * Construct an empty TupleHashTable
 259  *
 260  *      numCols, keyColIdx: identify the tuple fields to use as lookup key
 261  *      eqfunctions: equality comparison functions to use
 262  *      hashfunctions: datatype-specific hashing functions to use
 263  *      nbuckets: initial estimate of hashtable size
 264  *      entrysize: size of each entry (at least sizeof(TupleHashEntryData))
 265  *      tablecxt: memory context in which to store table and table entries
 266  *      tempcxt: short-lived context for evaluation hash and comparison functions
 267  *
 268  * The function arrays may be made with execTuplesHashPrepare().
 269  *
 270  * Note that keyColIdx, eqfunctions, and hashfunctions must be allocated in
 271  * storage that will live as long as the hashtable does.
 272  */
 273 TupleHashTable
 274 BuildTupleHashTable(int numCols, AttrNumber *keyColIdx,
 275                                         FmgrInfo *eqfunctions,
 276                                         FmgrInfo *hashfunctions,
 277                                         int nbuckets, Size entrysize,
 278                                         MemoryContext tablecxt, MemoryContext tempcxt)
 279 {
 280         TupleHashTable hashtable;
 281         HASHCTL         hash_ctl;
 282
 283         Assert(nbuckets > 0);
 284         Assert(entrysize >= sizeof(TupleHashEntryData));
 285
 286         hashtable = (TupleHashTable) MemoryContextAlloc(tablecxt,
 287                                                                                                  sizeof(TupleHashTableData));
 288
 289         hashtable->numCols = numCols;
 290         hashtable->keyColIdx = keyColIdx;
 291         hashtable->eqfunctions = eqfunctions;
 292         hashtable->hashfunctions = hashfunctions;
 293         hashtable->tablecxt = tablecxt;
 294         hashtable->tempcxt = tempcxt;
 295         hashtable->entrysize = entrysize;
 296         hashtable->tableslot = NULL;    /* will be made on first lookup */
 297         hashtable->inputslot = NULL;
 298
 299         MemSet(&hash_ctl, 0, sizeof(hash_ctl));
 300         hash_ctl.keysize = sizeof(TupleHashEntryData);
 301         hash_ctl.entrysize = entrysize;
 302         hash_ctl.hash = TupleHashTableHash;
 303         hash_ctl.match = TupleHashTableMatch;
 304         hash_ctl.hcxt = tablecxt;
 305         hashtable->hashtab = hash_create("TupleHashTable", (long) nbuckets,
 306                                                                          &hash_ctl,
 307                                         HASH_ELEM | HASH_FUNCTION | HASH_COMPARE | HASH_CONTEXT);
 308
 309         return hashtable;
 310 }
 311
 312 /*
 313  * Find or create a hashtable entry for the tuple group containing the
 314  * given tuple.
 315  *
 316  * If isnew is NULL, we do not create new entries; we return NULL if no
 317  * match is found.
 318  *
 319  * If isnew isn't NULL, then a new entry is created if no existing entry
 320  * matches.  On return, *isnew is true if the entry is newly created,
 321  * false if it existed already.  Any extra space in a new entry has been
 322  * zeroed.
 323  */
 324 TupleHashEntry
 325 LookupTupleHashEntry(TupleHashTable hashtable, TupleTableSlot *slot,
 326                                          bool *isnew)
 327 {
 328         TupleHashEntry entry;
 329         MemoryContext oldContext;
 330         TupleHashTable saveCurHT;
 331         TupleHashEntryData dummy;
 332         bool            found;
 333
 334         /* If first time through, clone the input slot to make table slot */
 335         if (hashtable->tableslot == NULL)
 336         {
 337                 TupleDesc       tupdesc;
 338
 339                 oldContext = MemoryContextSwitchTo(hashtable->tablecxt);
 340
 341                 /*
 342                  * We copy the input tuple descriptor just for safety --- we assume
 343                  * all input tuples will have equivalent descriptors.
 344                  */
 345                 tupdesc = CreateTupleDescCopy(slot->tts_tupleDescriptor);
 346                 hashtable->tableslot = MakeSingleTupleTableSlot(tupdesc);
 347                 MemoryContextSwitchTo(oldContext);
 348         }
 349
 350         /* Need to run the hash functions in short-lived context */
 351         oldContext = MemoryContextSwitchTo(hashtable->tempcxt);
 352
 353         /*
 354          * Set up data needed by hash and match functions
 355          *
 356          * We save and restore CurTupleHashTable just in case someone manages to
 357          * invoke this code re-entrantly.
 358          */
 359         hashtable->inputslot = slot;
 360         saveCurHT = CurTupleHashTable;
 361         CurTupleHashTable = hashtable;
 362
 363         /* Search the hash table */
 364         dummy.firstTuple = NULL;        /* flag to reference inputslot */
 365         entry = (TupleHashEntry) hash_search(hashtable->hashtab,
 366                                                                                  &dummy,
 367                                                                                  isnew ? HASH_ENTER : HASH_FIND,
 368                                                                                  &found);
 369
 370         if (isnew)
 371         {
 372                 if (found)
 373                 {
 374                         /* found pre-existing entry */
 375                         *isnew = false;
 376                 }
 377                 else
 378                 {
 379                         /*
 380                          * created new entry
 381                          *
 382                          * Zero any caller-requested space in the entry.  (This zaps the
 383                          * "key data" dynahash.c copied into the new entry, but we don't
 384                          * care since we're about to overwrite it anyway.)
 385                          */
 386                         MemSet(entry, 0, hashtable->entrysize);
 387
 388                         /* Copy the first tuple into the table context */
 389                         MemoryContextSwitchTo(hashtable->tablecxt);
 390                         entry->firstTuple = ExecCopySlotMinimalTuple(slot);
 391
 392                         *isnew = true;
 393                 }
 394         }
 395
 396         CurTupleHashTable = saveCurHT;
 397
 398         MemoryContextSwitchTo(oldContext);
 399
 400         return entry;
 401 }
 402
 403 /*
 404  * Compute the hash value for a tuple
 405  *
 406  * The passed-in key is a pointer to TupleHashEntryData.  In an actual hash
 407  * table entry, the firstTuple field points to a tuple (in MinimalTuple
 408  * format).  LookupTupleHashEntry sets up a dummy TupleHashEntryData with a
 409  * NULL firstTuple field --- that cues us to look at the inputslot instead.
 410  * This convention avoids the need to materialize virtual input tuples unless
 411  * they actually need to get copied into the table.
 412  *
 413  * CurTupleHashTable must be set before calling this, since dynahash.c
 414  * doesn't provide any API that would let us get at the hashtable otherwise.
 415  *
 416  * Also, the caller must select an appropriate memory context for running
 417  * the hash functions. (dynahash.c doesn't change CurrentMemoryContext.)
 418  */
 419 static uint32
 420 TupleHashTableHash(const void *key, Size keysize)
 421 {
 422         MinimalTuple tuple = ((const TupleHashEntryData *) key)->firstTuple;
 423         TupleTableSlot *slot;
 424         TupleHashTable hashtable = CurTupleHashTable;
 425         int                     numCols = hashtable->numCols;
 426         AttrNumber *keyColIdx = hashtable->keyColIdx;
 427         uint32          hashkey = 0;
 428         int                     i;
 429
 430         if (tuple == NULL)
 431         {
 432                 /* Process the current input tuple for the table */
 433                 slot = hashtable->inputslot;
 434         }
 435         else
 436         {
 437                 /* Process a tuple already stored in the table */
 438                 /* (this case never actually occurs in current dynahash.c code) */
 439                 slot = hashtable->tableslot;
 440                 ExecStoreMinimalTuple(tuple, slot, false);
 441         }
 442
 443         for (i = 0; i < numCols; i++)
 444         {
 445                 AttrNumber      att = keyColIdx[i];
 446                 Datum           attr;
 447                 bool            isNull;
 448
 449                 /* rotate hashkey left 1 bit at each step */
 450                 hashkey = (hashkey << 1) | ((hashkey & 0x80000000) ? 1 : 0);
 451
 452                 attr = slot_getattr(slot, att, &isNull);
 453
 454                 if (!isNull)                    /* treat nulls as having hash key 0 */
 455                 {
 456                         uint32          hkey;
 457
 458                         hkey = DatumGetUInt32(FunctionCall1(&hashtable->hashfunctions[i],
 459                                                                                                 attr));
 460                         hashkey ^= hkey;
 461                 }
 462         }
 463
 464         return hashkey;
 465 }
 466
 467 /*
 468  * See whether two tuples (presumably of the same hash value) match
 469  *
 470  * As above, the passed pointers are pointers to TupleHashEntryData.
 471  *
 472  * CurTupleHashTable must be set before calling this, since dynahash.c
 473  * doesn't provide any API that would let us get at the hashtable otherwise.
 474  *
 475  * Also, the caller must select an appropriate memory context for running
 476  * the compare functions.  (dynahash.c doesn't change CurrentMemoryContext.)
 477  */
 478 static int
 479 TupleHashTableMatch(const void *key1, const void *key2, Size keysize)
 480 {
 481         MinimalTuple tuple1 = ((const TupleHashEntryData *) key1)->firstTuple;
 482
 483 #ifdef USE_ASSERT_CHECKING
 484         MinimalTuple tuple2 = ((const TupleHashEntryData *) key2)->firstTuple;
 485 #endif
 486         TupleTableSlot *slot1;
 487         TupleTableSlot *slot2;
 488         TupleHashTable hashtable = CurTupleHashTable;
 489
 490         /*
 491          * We assume that dynahash.c will only ever call us with the first
 492          * argument being an actual table entry, and the second argument being
 493          * LookupTupleHashEntry's dummy TupleHashEntryData.  The other direction
 494          * could be supported too, but is not currently used by dynahash.c.
 495          */
 496         Assert(tuple1 != NULL);
 497         slot1 = hashtable->tableslot;
 498         ExecStoreMinimalTuple(tuple1, slot1, false);
 499         Assert(tuple2 == NULL);
 500         slot2 = hashtable->inputslot;
 501
 502         if (execTuplesMatch(slot1,
 503                                                 slot2,
 504                                                 hashtable->numCols,
 505                                                 hashtable->keyColIdx,
 506                                                 hashtable->eqfunctions,
 507                                                 hashtable->tempcxt))
 508                 return 0;
 509         else
 510                 return 1;
 511 }