granicus.if.org Git - postgresql/blob - src/backend/executor/nodeSetOp.c

   1 /*-------------------------------------------------------------------------
   2  *
   3  * nodeSetOp.c
   4  *        Routines to handle INTERSECT and EXCEPT selection
   5  *
   6  * The input of a SetOp node consists of tuples from two relations,
   7  * which have been combined into one dataset, with a junk attribute added
   8  * that shows which relation each tuple came from.  In SETOP_SORTED mode,
   9  * the input has furthermore been sorted according to all the grouping
  10  * columns (ie, all the non-junk attributes).  The SetOp node scans each
  11  * group of identical tuples to determine how many came from each input
  12  * relation.  Then it is a simple matter to emit the output demanded by the
  13  * SQL spec for INTERSECT, INTERSECT ALL, EXCEPT, or EXCEPT ALL.
  14  *
  15  * In SETOP_HASHED mode, the input is delivered in no particular order,
  16  * except that we know all the tuples from one input relation will come before
  17  * all the tuples of the other.  The planner guarantees that the first input
  18  * relation is the left-hand one for EXCEPT, and tries to make the smaller
  19  * input relation come first for INTERSECT.  We build a hash table in memory
  20  * with one entry for each group of identical tuples, and count the number of
  21  * tuples in the group from each relation.  After seeing all the input, we
  22  * scan the hashtable and generate the correct output using those counts.
  23  * We can avoid making hashtable entries for any tuples appearing only in the
  24  * second input relation, since they cannot result in any output.
  25  *
  26  * This node type is not used for UNION or UNION ALL, since those can be
  27  * implemented more cheaply (there's no need for the junk attribute to
  28  * identify the source relation).
  29  *
  30  * Note that SetOp does no qual checking nor projection.  The delivered
  31  * output tuples are just copies of the first-to-arrive tuple in each
  32  * input group.
  33  *
  34  *
  35  * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
  36  * Portions Copyright (c) 1994, Regents of the University of California
  37  *
  38  *
  39  * IDENTIFICATION
  40  *        src/backend/executor/nodeSetOp.c
  41  *
  42  *-------------------------------------------------------------------------
  43  */
  44
  45 #include "postgres.h"
  46
  47 #include "access/htup_details.h"
  48 #include "executor/executor.h"
  49 #include "executor/nodeSetOp.h"
  50 #include "miscadmin.h"
  51 #include "utils/memutils.h"
  52
  53
  54 /*
  55  * SetOpStatePerGroupData - per-group working state
  56  *
  57  * These values are working state that is initialized at the start of
  58  * an input tuple group and updated for each input tuple.
  59  *
  60  * In SETOP_SORTED mode, we need only one of these structs, and it's kept in
  61  * the plan state node.  In SETOP_HASHED mode, the hash table contains one
  62  * of these for each tuple group.
  63  */
  64 typedef struct SetOpStatePerGroupData
  65 {
  66         long            numLeft;                /* number of left-input dups in group */
  67         long            numRight;               /* number of right-input dups in group */
  68 }                       SetOpStatePerGroupData;
  69
  70
  71 static TupleTableSlot *setop_retrieve_direct(SetOpState *setopstate);
  72 static void setop_fill_hash_table(SetOpState *setopstate);
  73 static TupleTableSlot *setop_retrieve_hash_table(SetOpState *setopstate);
  74
  75
  76 /*
  77  * Initialize state for a new group of input values.
  78  */
  79 static inline void
  80 initialize_counts(SetOpStatePerGroup pergroup)
  81 {
  82         pergroup->numLeft = pergroup->numRight = 0;
  83 }
  84
  85 /*
  86  * Advance the appropriate counter for one input tuple.
  87  */
  88 static inline void
  89 advance_counts(SetOpStatePerGroup pergroup, int flag)
  90 {
  91         if (flag)
  92                 pergroup->numRight++;
  93         else
  94                 pergroup->numLeft++;
  95 }
  96
  97 /*
  98  * Fetch the "flag" column from an input tuple.
  99  * This is an integer column with value 0 for left side, 1 for right side.
 100  */
 101 static int
 102 fetch_tuple_flag(SetOpState *setopstate, TupleTableSlot *inputslot)
 103 {
 104         SetOp      *node = (SetOp *) setopstate->ps.plan;
 105         int                     flag;
 106         bool            isNull;
 107
 108         flag = DatumGetInt32(slot_getattr(inputslot,
 109                                                                           node->flagColIdx,
 110                                                                           &isNull));
 111         Assert(!isNull);
 112         Assert(flag == 0 || flag == 1);
 113         return flag;
 114 }
 115
 116 /*
 117  * Initialize the hash table to empty.
 118  */
 119 static void
 120 build_hash_table(SetOpState *setopstate)
 121 {
 122         SetOp      *node = (SetOp *) setopstate->ps.plan;
 123         ExprContext *econtext = setopstate->ps.ps_ExprContext;
 124         TupleDesc       desc = ExecGetResultType(outerPlanState(setopstate));
 125
 126         Assert(node->strategy == SETOP_HASHED);
 127         Assert(node->numGroups > 0);
 128
 129         setopstate->hashtable = BuildTupleHashTable(&setopstate->ps,
 130                                                                                                 desc,
 131                                                                                                 node->numCols,
 132                                                                                                 node->dupColIdx,
 133                                                                                                 setopstate->eqfuncoids,
 134                                                                                                 setopstate->hashfunctions,
 135                                                                                                 node->numGroups,
 136                                                                                                 0,
 137                                                                                                 setopstate->tableContext,
 138                                                                                                 econtext->ecxt_per_tuple_memory,
 139                                                                                                 false);
 140 }
 141
 142 /*
 143  * We've completed processing a tuple group.  Decide how many copies (if any)
 144  * of its representative row to emit, and store the count into numOutput.
 145  * This logic is straight from the SQL92 specification.
 146  */
 147 static void
 148 set_output_count(SetOpState *setopstate, SetOpStatePerGroup pergroup)
 149 {
 150         SetOp      *plannode = (SetOp *) setopstate->ps.plan;
 151
 152         switch (plannode->cmd)
 153         {
 154                 case SETOPCMD_INTERSECT:
 155                         if (pergroup->numLeft > 0 && pergroup->numRight > 0)
 156                                 setopstate->numOutput = 1;
 157                         else
 158                                 setopstate->numOutput = 0;
 159                         break;
 160                 case SETOPCMD_INTERSECT_ALL:
 161                         setopstate->numOutput =
 162                                 (pergroup->numLeft < pergroup->numRight) ?
 163                                 pergroup->numLeft : pergroup->numRight;
 164                         break;
 165                 case SETOPCMD_EXCEPT:
 166                         if (pergroup->numLeft > 0 && pergroup->numRight == 0)
 167                                 setopstate->numOutput = 1;
 168                         else
 169                                 setopstate->numOutput = 0;
 170                         break;
 171                 case SETOPCMD_EXCEPT_ALL:
 172                         setopstate->numOutput =
 173                                 (pergroup->numLeft < pergroup->numRight) ?
 174                                 0 : (pergroup->numLeft - pergroup->numRight);
 175                         break;
 176                 default:
 177                         elog(ERROR, "unrecognized set op: %d", (int) plannode->cmd);
 178                         break;
 179         }
 180 }
 181
 182
 183 /* ----------------------------------------------------------------
 184  *              ExecSetOp
 185  * ----------------------------------------------------------------
 186  */
 187 static TupleTableSlot *                 /* return: a tuple or NULL */
 188 ExecSetOp(PlanState *pstate)
 189 {
 190         SetOpState *node = castNode(SetOpState, pstate);
 191         SetOp      *plannode = (SetOp *) node->ps.plan;
 192         TupleTableSlot *resultTupleSlot = node->ps.ps_ResultTupleSlot;
 193
 194         CHECK_FOR_INTERRUPTS();
 195
 196         /*
 197          * If the previously-returned tuple needs to be returned more than once,
 198          * keep returning it.
 199          */
 200         if (node->numOutput > 0)
 201         {
 202                 node->numOutput--;
 203                 return resultTupleSlot;
 204         }
 205
 206         /* Otherwise, we're done if we are out of groups */
 207         if (node->setop_done)
 208                 return NULL;
 209
 210         /* Fetch the next tuple group according to the correct strategy */
 211         if (plannode->strategy == SETOP_HASHED)
 212         {
 213                 if (!node->table_filled)
 214                         setop_fill_hash_table(node);
 215                 return setop_retrieve_hash_table(node);
 216         }
 217         else
 218                 return setop_retrieve_direct(node);
 219 }
 220
 221 /*
 222  * ExecSetOp for non-hashed case
 223  */
 224 static TupleTableSlot *
 225 setop_retrieve_direct(SetOpState *setopstate)
 226 {
 227         PlanState  *outerPlan;
 228         SetOpStatePerGroup pergroup;
 229         TupleTableSlot *outerslot;
 230         TupleTableSlot *resultTupleSlot;
 231         ExprContext *econtext = setopstate->ps.ps_ExprContext;
 232
 233         /*
 234          * get state info from node
 235          */
 236         outerPlan = outerPlanState(setopstate);
 237         pergroup = (SetOpStatePerGroup) setopstate->pergroup;
 238         resultTupleSlot = setopstate->ps.ps_ResultTupleSlot;
 239
 240         /*
 241          * We loop retrieving groups until we find one we should return
 242          */
 243         while (!setopstate->setop_done)
 244         {
 245                 /*
 246                  * If we don't already have the first tuple of the new group, fetch it
 247                  * from the outer plan.
 248                  */
 249                 if (setopstate->grp_firstTuple == NULL)
 250                 {
 251                         outerslot = ExecProcNode(outerPlan);
 252                         if (!TupIsNull(outerslot))
 253                         {
 254                                 /* Make a copy of the first input tuple */
 255                                 setopstate->grp_firstTuple = ExecCopySlotHeapTuple(outerslot);
 256                         }
 257                         else
 258                         {
 259                                 /* outer plan produced no tuples at all */
 260                                 setopstate->setop_done = true;
 261                                 return NULL;
 262                         }
 263                 }
 264
 265                 /*
 266                  * Store the copied first input tuple in the tuple table slot reserved
 267                  * for it.  The tuple will be deleted when it is cleared from the
 268                  * slot.
 269                  */
 270                 ExecStoreHeapTuple(setopstate->grp_firstTuple,
 271                                                    resultTupleSlot,
 272                                                    true);
 273                 setopstate->grp_firstTuple = NULL;      /* don't keep two pointers */
 274
 275                 /* Initialize working state for a new input tuple group */
 276                 initialize_counts(pergroup);
 277
 278                 /* Count the first input tuple */
 279                 advance_counts(pergroup,
 280                                            fetch_tuple_flag(setopstate, resultTupleSlot));
 281
 282                 /*
 283                  * Scan the outer plan until we exhaust it or cross a group boundary.
 284                  */
 285                 for (;;)
 286                 {
 287                         outerslot = ExecProcNode(outerPlan);
 288                         if (TupIsNull(outerslot))
 289                         {
 290                                 /* no more outer-plan tuples available */
 291                                 setopstate->setop_done = true;
 292                                 break;
 293                         }
 294
 295                         /*
 296                          * Check whether we've crossed a group boundary.
 297                          */
 298                         econtext->ecxt_outertuple = resultTupleSlot;
 299                         econtext->ecxt_innertuple = outerslot;
 300
 301                         if (!ExecQualAndReset(setopstate->eqfunction, econtext))
 302                         {
 303                                 /*
 304                                  * Save the first input tuple of the next group.
 305                                  */
 306                                 setopstate->grp_firstTuple = ExecCopySlotHeapTuple(outerslot);
 307                                 break;
 308                         }
 309
 310                         /* Still in same group, so count this tuple */
 311                         advance_counts(pergroup,
 312                                                    fetch_tuple_flag(setopstate, outerslot));
 313                 }
 314
 315                 /*
 316                  * Done scanning input tuple group.  See if we should emit any copies
 317                  * of result tuple, and if so return the first copy.
 318                  */
 319                 set_output_count(setopstate, pergroup);
 320
 321                 if (setopstate->numOutput > 0)
 322                 {
 323                         setopstate->numOutput--;
 324                         return resultTupleSlot;
 325                 }
 326         }
 327
 328         /* No more groups */
 329         ExecClearTuple(resultTupleSlot);
 330         return NULL;
 331 }
 332
 333 /*
 334  * ExecSetOp for hashed case: phase 1, read input and build hash table
 335  */
 336 static void
 337 setop_fill_hash_table(SetOpState *setopstate)
 338 {
 339         SetOp      *node = (SetOp *) setopstate->ps.plan;
 340         PlanState  *outerPlan;
 341         int                     firstFlag;
 342         bool            in_first_rel PG_USED_FOR_ASSERTS_ONLY;
 343         ExprContext *econtext = setopstate->ps.ps_ExprContext;
 344
 345         /*
 346          * get state info from node
 347          */
 348         outerPlan = outerPlanState(setopstate);
 349         firstFlag = node->firstFlag;
 350         /* verify planner didn't mess up */
 351         Assert(firstFlag == 0 ||
 352                    (firstFlag == 1 &&
 353                         (node->cmd == SETOPCMD_INTERSECT ||
 354                          node->cmd == SETOPCMD_INTERSECT_ALL)));
 355
 356         /*
 357          * Process each outer-plan tuple, and then fetch the next one, until we
 358          * exhaust the outer plan.
 359          */
 360         in_first_rel = true;
 361         for (;;)
 362         {
 363                 TupleTableSlot *outerslot;
 364                 int                     flag;
 365                 TupleHashEntryData *entry;
 366                 bool            isnew;
 367
 368                 outerslot = ExecProcNode(outerPlan);
 369                 if (TupIsNull(outerslot))
 370                         break;
 371
 372                 /* Identify whether it's left or right input */
 373                 flag = fetch_tuple_flag(setopstate, outerslot);
 374
 375                 if (flag == firstFlag)
 376                 {
 377                         /* (still) in first input relation */
 378                         Assert(in_first_rel);
 379
 380                         /* Find or build hashtable entry for this tuple's group */
 381                         entry = LookupTupleHashEntry(setopstate->hashtable, outerslot,
 382                                                                                  &isnew);
 383
 384                         /* If new tuple group, initialize counts */
 385                         if (isnew)
 386                         {
 387                                 entry->additional = (SetOpStatePerGroup)
 388                                         MemoryContextAlloc(setopstate->hashtable->tablecxt,
 389                                                                            sizeof(SetOpStatePerGroupData));
 390                                 initialize_counts((SetOpStatePerGroup) entry->additional);
 391                         }
 392
 393                         /* Advance the counts */
 394                         advance_counts((SetOpStatePerGroup) entry->additional, flag);
 395                 }
 396                 else
 397                 {
 398                         /* reached second relation */
 399                         in_first_rel = false;
 400
 401                         /* For tuples not seen previously, do not make hashtable entry */
 402                         entry = LookupTupleHashEntry(setopstate->hashtable, outerslot,
 403                                                                                  NULL);
 404
 405                         /* Advance the counts if entry is already present */
 406                         if (entry)
 407                                 advance_counts((SetOpStatePerGroup) entry->additional, flag);
 408                 }
 409
 410                 /* Must reset expression context after each hashtable lookup */
 411                 ResetExprContext(econtext);
 412         }
 413
 414         setopstate->table_filled = true;
 415         /* Initialize to walk the hash table */
 416         ResetTupleHashIterator(setopstate->hashtable, &setopstate->hashiter);
 417 }
 418
 419 /*
 420  * ExecSetOp for hashed case: phase 2, retrieving groups from hash table
 421  */
 422 static TupleTableSlot *
 423 setop_retrieve_hash_table(SetOpState *setopstate)
 424 {
 425         TupleHashEntryData *entry;
 426         TupleTableSlot *resultTupleSlot;
 427
 428         /*
 429          * get state info from node
 430          */
 431         resultTupleSlot = setopstate->ps.ps_ResultTupleSlot;
 432
 433         /*
 434          * We loop retrieving groups until we find one we should return
 435          */
 436         while (!setopstate->setop_done)
 437         {
 438                 CHECK_FOR_INTERRUPTS();
 439
 440                 /*
 441                  * Find the next entry in the hash table
 442                  */
 443                 entry = ScanTupleHashTable(setopstate->hashtable, &setopstate->hashiter);
 444                 if (entry == NULL)
 445                 {
 446                         /* No more entries in hashtable, so done */
 447                         setopstate->setop_done = true;
 448                         return NULL;
 449                 }
 450
 451                 /*
 452                  * See if we should emit any copies of this tuple, and if so return
 453                  * the first copy.
 454                  */
 455                 set_output_count(setopstate, (SetOpStatePerGroup) entry->additional);
 456
 457                 if (setopstate->numOutput > 0)
 458                 {
 459                         setopstate->numOutput--;
 460                         return ExecStoreMinimalTuple(entry->firstTuple,
 461                                                                                  resultTupleSlot,
 462                                                                                  false);
 463                 }
 464         }
 465
 466         /* No more groups */
 467         ExecClearTuple(resultTupleSlot);
 468         return NULL;
 469 }
 470
 471 /* ----------------------------------------------------------------
 472  *              ExecInitSetOp
 473  *
 474  *              This initializes the setop node state structures and
 475  *              the node's subplan.
 476  * ----------------------------------------------------------------
 477  */
 478 SetOpState *
 479 ExecInitSetOp(SetOp *node, EState *estate, int eflags)
 480 {
 481         SetOpState *setopstate;
 482         TupleDesc       outerDesc;
 483
 484         /* check for unsupported flags */
 485         Assert(!(eflags & (EXEC_FLAG_BACKWARD | EXEC_FLAG_MARK)));
 486
 487         /*
 488          * create state structure
 489          */
 490         setopstate = makeNode(SetOpState);
 491         setopstate->ps.plan = (Plan *) node;
 492         setopstate->ps.state = estate;
 493         setopstate->ps.ExecProcNode = ExecSetOp;
 494
 495         setopstate->eqfuncoids = NULL;
 496         setopstate->hashfunctions = NULL;
 497         setopstate->setop_done = false;
 498         setopstate->numOutput = 0;
 499         setopstate->pergroup = NULL;
 500         setopstate->grp_firstTuple = NULL;
 501         setopstate->hashtable = NULL;
 502         setopstate->tableContext = NULL;
 503
 504         /*
 505          * create expression context
 506          */
 507         ExecAssignExprContext(estate, &setopstate->ps);
 508
 509         /*
 510          * If hashing, we also need a longer-lived context to store the hash
 511          * table.  The table can't just be kept in the per-query context because
 512          * we want to be able to throw it away in ExecReScanSetOp.
 513          */
 514         if (node->strategy == SETOP_HASHED)
 515                 setopstate->tableContext =
 516                         AllocSetContextCreate(CurrentMemoryContext,
 517                                                                   "SetOp hash table",
 518                                                                   ALLOCSET_DEFAULT_SIZES);
 519
 520         /*
 521          * initialize child nodes
 522          *
 523          * If we are hashing then the child plan does not need to handle REWIND
 524          * efficiently; see ExecReScanSetOp.
 525          */
 526         if (node->strategy == SETOP_HASHED)
 527                 eflags &= ~EXEC_FLAG_REWIND;
 528         outerPlanState(setopstate) = ExecInitNode(outerPlan(node), estate, eflags);
 529         outerDesc = ExecGetResultType(outerPlanState(setopstate));
 530
 531         /*
 532          * Initialize result slot and type. Setop nodes do no projections, so
 533          * initialize projection info for this node appropriately.
 534          */
 535         ExecInitResultTupleSlotTL(&setopstate->ps,
 536                                                           node->strategy == SETOP_HASHED ?
 537                                                           &TTSOpsMinimalTuple : &TTSOpsHeapTuple);
 538         setopstate->ps.ps_ProjInfo = NULL;
 539
 540         /*
 541          * Precompute fmgr lookup data for inner loop. We need both equality and
 542          * hashing functions to do it by hashing, but only equality if not
 543          * hashing.
 544          */
 545         if (node->strategy == SETOP_HASHED)
 546                 execTuplesHashPrepare(node->numCols,
 547                                                           node->dupOperators,
 548                                                           &setopstate->eqfuncoids,
 549                                                           &setopstate->hashfunctions);
 550         else
 551                 setopstate->eqfunction =
 552                         execTuplesMatchPrepare(outerDesc,
 553                                                                    node->numCols,
 554                                                                    node->dupColIdx,
 555                                                                    node->dupOperators,
 556                                                                    &setopstate->ps);
 557
 558         if (node->strategy == SETOP_HASHED)
 559         {
 560                 build_hash_table(setopstate);
 561                 setopstate->table_filled = false;
 562         }
 563         else
 564         {
 565                 setopstate->pergroup =
 566                         (SetOpStatePerGroup) palloc0(sizeof(SetOpStatePerGroupData));
 567         }
 568
 569         return setopstate;
 570 }
 571
 572 /* ----------------------------------------------------------------
 573  *              ExecEndSetOp
 574  *
 575  *              This shuts down the subplan and frees resources allocated
 576  *              to this node.
 577  * ----------------------------------------------------------------
 578  */
 579 void
 580 ExecEndSetOp(SetOpState *node)
 581 {
 582         /* clean up tuple table */
 583         ExecClearTuple(node->ps.ps_ResultTupleSlot);
 584
 585         /* free subsidiary stuff including hashtable */
 586         if (node->tableContext)
 587                 MemoryContextDelete(node->tableContext);
 588         ExecFreeExprContext(&node->ps);
 589
 590         ExecEndNode(outerPlanState(node));
 591 }
 592
 593
 594 void
 595 ExecReScanSetOp(SetOpState *node)
 596 {
 597         ExecClearTuple(node->ps.ps_ResultTupleSlot);
 598         node->setop_done = false;
 599         node->numOutput = 0;
 600
 601         if (((SetOp *) node->ps.plan)->strategy == SETOP_HASHED)
 602         {
 603                 /*
 604                  * In the hashed case, if we haven't yet built the hash table then we
 605                  * can just return; nothing done yet, so nothing to undo. If subnode's
 606                  * chgParam is not NULL then it will be re-scanned by ExecProcNode,
 607                  * else no reason to re-scan it at all.
 608                  */
 609                 if (!node->table_filled)
 610                         return;
 611
 612                 /*
 613                  * If we do have the hash table and the subplan does not have any
 614                  * parameter changes, then we can just rescan the existing hash table;
 615                  * no need to build it again.
 616                  */
 617                 if (node->ps.lefttree->chgParam == NULL)
 618                 {
 619                         ResetTupleHashIterator(node->hashtable, &node->hashiter);
 620                         return;
 621                 }
 622         }
 623
 624         /* Release first tuple of group, if we have made a copy */
 625         if (node->grp_firstTuple != NULL)
 626         {
 627                 heap_freetuple(node->grp_firstTuple);
 628                 node->grp_firstTuple = NULL;
 629         }
 630
 631         /* Release any hashtable storage */
 632         if (node->tableContext)
 633                 MemoryContextResetAndDeleteChildren(node->tableContext);
 634
 635         /* And rebuild empty hashtable if needed */
 636         if (((SetOp *) node->ps.plan)->strategy == SETOP_HASHED)
 637         {
 638                 build_hash_table(node);
 639                 node->table_filled = false;
 640         }
 641
 642         /*
 643          * if chgParam of subnode is not null then plan will be re-scanned by
 644          * first ExecProcNode.
 645          */
 646         if (node->ps.lefttree->chgParam == NULL)
 647                 ExecReScan(node->ps.lefttree);
 648 }