granicus.if.org Git - postgresql/blob - src/backend/parser/parse_clause.c

   1 /*-------------------------------------------------------------------------
   2  *
   3  * parse_clause.c
   4  *        handle clauses in parser
   5  *
   6  * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group
   7  * Portions Copyright (c) 1994, Regents of the University of California
   8  *
   9  *
  10  * IDENTIFICATION
  11  *        $Header: /cvsroot/pgsql/src/backend/parser/parse_clause.c,v 1.123 2003/09/25 06:58:01 petere Exp $
  12  *
  13  *-------------------------------------------------------------------------
  14  */
  15
  16 #include "postgres.h"
  17
  18 #include "access/heapam.h"
  19 #include "catalog/heap.h"
  20 #include "nodes/makefuncs.h"
  21 #include "optimizer/clauses.h"
  22 #include "optimizer/tlist.h"
  23 #include "optimizer/var.h"
  24 #include "parser/analyze.h"
  25 #include "parser/parsetree.h"
  26 #include "parser/parse_clause.h"
  27 #include "parser/parse_coerce.h"
  28 #include "parser/parse_expr.h"
  29 #include "parser/parse_oper.h"
  30 #include "parser/parse_relation.h"
  31 #include "parser/parse_target.h"
  32 #include "parser/parse_type.h"
  33 #include "rewrite/rewriteManip.h"
  34 #include "utils/builtins.h"
  35 #include "utils/guc.h"
  36
  37
  38 #define ORDER_CLAUSE 0
  39 #define GROUP_CLAUSE 1
  40 #define DISTINCT_ON_CLAUSE 2
  41
  42 static char *clauseText[] = {"ORDER BY", "GROUP BY", "DISTINCT ON"};
  43
  44 static void extractRemainingColumns(List *common_colnames,
  45                                                 List *src_colnames, List *src_colvars,
  46                                                 List **res_colnames, List **res_colvars);
  47 static Node *transformJoinUsingClause(ParseState *pstate,
  48                                                  List *leftVars, List *rightVars);
  49 static Node *transformJoinOnClause(ParseState *pstate, JoinExpr *j,
  50                                           List *containedRels);
  51 static RangeTblRef *transformTableEntry(ParseState *pstate, RangeVar *r);
  52 static RangeTblRef *transformRangeSubselect(ParseState *pstate,
  53                                                 RangeSubselect *r);
  54 static RangeTblRef *transformRangeFunction(ParseState *pstate,
  55                                            RangeFunction *r);
  56 static Node *transformFromClauseItem(ParseState *pstate, Node *n,
  57                                                 List **containedRels);
  58 static Node *buildMergedJoinVar(ParseState *pstate, JoinType jointype,
  59                                    Var *l_colvar, Var *r_colvar);
  60 static TargetEntry *findTargetlistEntry(ParseState *pstate, Node *node,
  61                                         List *tlist, int clause);
  62
  63
  64 /*
  65  * transformFromClause -
  66  *        Process the FROM clause and add items to the query's range table,
  67  *        joinlist, and namespace.
  68  *
  69  * Note: we assume that pstate's p_rtable, p_joinlist, and p_namespace lists
  70  * were initialized to NIL when the pstate was created.  We will add onto
  71  * any entries already present --- this is needed for rule processing, as
  72  * well as for UPDATE and DELETE.
  73  *
  74  * The range table may grow still further when we transform the expressions
  75  * in the query's quals and target list. (This is possible because in
  76  * POSTQUEL, we allowed references to relations not specified in the
  77  * from-clause.  PostgreSQL keeps this extension to standard SQL.)
  78  */
  79 void
  80 transformFromClause(ParseState *pstate, List *frmList)
  81 {
  82         List       *fl;
  83
  84         /*
  85          * The grammar will have produced a list of RangeVars,
  86          * RangeSubselects, RangeFunctions, and/or JoinExprs. Transform each
  87          * one (possibly adding entries to the rtable), check for duplicate
  88          * refnames, and then add it to the joinlist and namespace.
  89          */
  90         foreach(fl, frmList)
  91         {
  92                 Node       *n = lfirst(fl);
  93                 List       *containedRels;
  94
  95                 n = transformFromClauseItem(pstate, n, &containedRels);
  96                 checkNameSpaceConflicts(pstate, (Node *) pstate->p_namespace, n);
  97                 pstate->p_joinlist = lappend(pstate->p_joinlist, n);
  98                 pstate->p_namespace = lappend(pstate->p_namespace, n);
  99         }
 100 }
 101
 102 /*
 103  * setTargetTable
 104  *        Add the target relation of INSERT/UPDATE/DELETE to the range table,
 105  *        and make the special links to it in the ParseState.
 106  *
 107  *        We also open the target relation and acquire a write lock on it.
 108  *        This must be done before processing the FROM list, in case the target
 109  *        is also mentioned as a source relation --- we want to be sure to grab
 110  *        the write lock before any read lock.
 111  *
 112  *        If alsoSource is true, add the target to the query's joinlist and
 113  *        namespace.  For INSERT, we don't want the target to be joined to;
 114  *        it's a destination of tuples, not a source.   For UPDATE/DELETE,
 115  *        we do need to scan or join the target.  (NOTE: we do not bother
 116  *        to check for namespace conflict; we assume that the namespace was
 117  *        initially empty in these cases.)
 118  *
 119  *        Returns the rangetable index of the target relation.
 120  */
 121 int
 122 setTargetTable(ParseState *pstate, RangeVar *relation,
 123                            bool inh, bool alsoSource)
 124 {
 125         RangeTblEntry *rte;
 126         int                     rtindex;
 127
 128         /* Close old target; this could only happen for multi-action rules */
 129         if (pstate->p_target_relation != NULL)
 130                 heap_close(pstate->p_target_relation, NoLock);
 131
 132         /*
 133          * Open target rel and grab suitable lock (which we will hold till end
 134          * of transaction).
 135          *
 136          * analyze.c will eventually do the corresponding heap_close(), but *not*
 137          * release the lock.
 138          */
 139         pstate->p_target_relation = heap_openrv(relation, RowExclusiveLock);
 140
 141         /*
 142          * Now build an RTE.
 143          */
 144         rte = addRangeTableEntry(pstate, relation, NULL, inh, false);
 145         pstate->p_target_rangetblentry = rte;
 146
 147         /* assume new rte is at end */
 148         rtindex = length(pstate->p_rtable);
 149         Assert(rte == rt_fetch(rtindex, pstate->p_rtable));
 150
 151         /*
 152          * Override addRangeTableEntry's default checkForRead, and instead
 153          * mark target table as requiring write access.
 154          *
 155          * If we find an explicit reference to the rel later during parse
 156          * analysis, scanRTEForColumn will change checkForRead to 'true'
 157          * again.  That can't happen for INSERT but it is possible for UPDATE
 158          * and DELETE.
 159          */
 160         rte->checkForRead = false;
 161         rte->checkForWrite = true;
 162
 163         /*
 164          * If UPDATE/DELETE, add table to joinlist and namespace.
 165          */
 166         if (alsoSource)
 167                 addRTEtoQuery(pstate, rte, true, true);
 168
 169         return rtindex;
 170 }
 171
 172 /*
 173  * Simplify InhOption (yes/no/default) into boolean yes/no.
 174  *
 175  * The reason we do things this way is that we don't want to examine the
 176  * SQL_inheritance option flag until parse_analyze is run.      Otherwise,
 177  * we'd do the wrong thing with query strings that intermix SET commands
 178  * with queries.
 179  */
 180 bool
 181 interpretInhOption(InhOption inhOpt)
 182 {
 183         switch (inhOpt)
 184         {
 185                 case INH_NO:
 186                         return false;
 187                 case INH_YES:
 188                         return true;
 189                 case INH_DEFAULT:
 190                         return SQL_inheritance;
 191         }
 192         elog(ERROR, "bogus InhOption value");
 193         return false;                           /* keep compiler quiet */
 194 }
 195
 196 /*
 197  * Extract all not-in-common columns from column lists of a source table
 198  */
 199 static void
 200 extractRemainingColumns(List *common_colnames,
 201                                                 List *src_colnames, List *src_colvars,
 202                                                 List **res_colnames, List **res_colvars)
 203 {
 204         List       *new_colnames = NIL;
 205         List       *new_colvars = NIL;
 206         List       *lnames,
 207                            *lvars = src_colvars;
 208
 209         foreach(lnames, src_colnames)
 210         {
 211                 char       *colname = strVal(lfirst(lnames));
 212                 bool            match = false;
 213                 List       *cnames;
 214
 215                 foreach(cnames, common_colnames)
 216                 {
 217                         char       *ccolname = strVal(lfirst(cnames));
 218
 219                         if (strcmp(colname, ccolname) == 0)
 220                         {
 221                                 match = true;
 222                                 break;
 223                         }
 224                 }
 225
 226                 if (!match)
 227                 {
 228                         new_colnames = lappend(new_colnames, lfirst(lnames));
 229                         new_colvars = lappend(new_colvars, lfirst(lvars));
 230                 }
 231
 232                 lvars = lnext(lvars);
 233         }
 234
 235         *res_colnames = new_colnames;
 236         *res_colvars = new_colvars;
 237 }
 238
 239 /* transformJoinUsingClause()
 240  *        Build a complete ON clause from a partially-transformed USING list.
 241  *        We are given lists of nodes representing left and right match columns.
 242  *        Result is a transformed qualification expression.
 243  */
 244 static Node *
 245 transformJoinUsingClause(ParseState *pstate, List *leftVars, List *rightVars)
 246 {
 247         Node       *result = NULL;
 248         List       *lvars,
 249                            *rvars = rightVars;
 250
 251         /*
 252          * We cheat a little bit here by building an untransformed operator
 253          * tree whose leaves are the already-transformed Vars.  This is OK
 254          * because transformExpr() won't complain about already-transformed
 255          * subnodes.
 256          */
 257         foreach(lvars, leftVars)
 258         {
 259                 Node       *lvar = (Node *) lfirst(lvars);
 260                 Node       *rvar = (Node *) lfirst(rvars);
 261                 A_Expr     *e;
 262
 263                 e = makeSimpleA_Expr(AEXPR_OP, "=", copyObject(lvar), copyObject(rvar));
 264
 265                 if (result == NULL)
 266                         result = (Node *) e;
 267                 else
 268                 {
 269                         A_Expr     *a;
 270
 271                         a = makeA_Expr(AEXPR_AND, NIL, result, (Node *) e);
 272                         result = (Node *) a;
 273                 }
 274
 275                 rvars = lnext(rvars);
 276         }
 277
 278         /*
 279          * Since the references are already Vars, and are certainly from the
 280          * input relations, we don't have to go through the same pushups that
 281          * transformJoinOnClause() does.  Just invoke transformExpr() to fix
 282          * up the operators, and we're done.
 283          */
 284         result = transformExpr(pstate, result);
 285
 286         result = coerce_to_boolean(pstate, result, "JOIN/USING");
 287
 288         return result;
 289 }       /* transformJoinUsingClause() */
 290
 291 /* transformJoinOnClause()
 292  *        Transform the qual conditions for JOIN/ON.
 293  *        Result is a transformed qualification expression.
 294  */
 295 static Node *
 296 transformJoinOnClause(ParseState *pstate, JoinExpr *j,
 297                                           List *containedRels)
 298 {
 299         Node       *result;
 300         List       *save_namespace;
 301         Relids          clause_varnos;
 302         int                     varno;
 303
 304         /*
 305          * This is a tad tricky, for two reasons.  First, the namespace that
 306          * the join expression should see is just the two subtrees of the JOIN
 307          * plus any outer references from upper pstate levels.  So,
 308          * temporarily set this pstate's namespace accordingly.  (We need not
 309          * check for refname conflicts, because transformFromClauseItem()
 310          * already did.) NOTE: this code is OK only because the ON clause
 311          * can't legally alter the namespace by causing implicit relation refs
 312          * to be added.
 313          */
 314         save_namespace = pstate->p_namespace;
 315         pstate->p_namespace = makeList2(j->larg, j->rarg);
 316
 317         result = transformWhereClause(pstate, j->quals, "JOIN/ON");
 318
 319         pstate->p_namespace = save_namespace;
 320
 321         /*
 322          * Second, we need to check that the ON condition doesn't refer to any
 323          * rels outside the input subtrees of the JOIN.  It could do that
 324          * despite our hack on the namespace if it uses fully-qualified names.
 325          * So, grovel through the transformed clause and make sure there are
 326          * no bogus references.  (Outer references are OK, and are ignored
 327          * here.)
 328          */
 329         clause_varnos = pull_varnos(result);
 330         while ((varno = bms_first_member(clause_varnos)) >= 0)
 331         {
 332                 if (!intMember(varno, containedRels))
 333                 {
 334                         ereport(ERROR,
 335                                         (errcode(ERRCODE_INVALID_COLUMN_REFERENCE),
 336                                          errmsg("JOIN/ON clause refers to \"%s\", which is not part of JOIN",
 337                                    rt_fetch(varno, pstate->p_rtable)->eref->aliasname)));
 338                 }
 339         }
 340         bms_free(clause_varnos);
 341
 342         return result;
 343 }
 344
 345 /*
 346  * transformTableEntry --- transform a RangeVar (simple relation reference)
 347  */
 348 static RangeTblRef *
 349 transformTableEntry(ParseState *pstate, RangeVar *r)
 350 {
 351         RangeTblEntry *rte;
 352         RangeTblRef *rtr;
 353
 354         /*
 355          * mark this entry to indicate it comes from the FROM clause. In SQL,
 356          * the target list can only refer to range variables specified in the
 357          * from clause but we follow the more powerful POSTQUEL semantics and
 358          * automatically generate the range variable if not specified. However
 359          * there are times we need to know whether the entries are legitimate.
 360          */
 361         rte = addRangeTableEntry(pstate, r, r->alias,
 362                                                          interpretInhOption(r->inhOpt), true);
 363
 364         /*
 365          * We create a RangeTblRef, but we do not add it to the joinlist or
 366          * namespace; our caller must do that if appropriate.
 367          */
 368         rtr = makeNode(RangeTblRef);
 369         /* assume new rte is at end */
 370         rtr->rtindex = length(pstate->p_rtable);
 371         Assert(rte == rt_fetch(rtr->rtindex, pstate->p_rtable));
 372
 373         return rtr;
 374 }
 375
 376
 377 /*
 378  * transformRangeSubselect --- transform a sub-SELECT appearing in FROM
 379  */
 380 static RangeTblRef *
 381 transformRangeSubselect(ParseState *pstate, RangeSubselect *r)
 382 {
 383         List       *parsetrees;
 384         Query      *query;
 385         RangeTblEntry *rte;
 386         RangeTblRef *rtr;
 387
 388         /*
 389          * We require user to supply an alias for a subselect, per SQL92. To
 390          * relax this, we'd have to be prepared to gin up a unique alias for
 391          * an unlabeled subselect.
 392          */
 393         if (r->alias == NULL)
 394                 ereport(ERROR,
 395                                 (errcode(ERRCODE_SYNTAX_ERROR),
 396                                  errmsg("subquery in FROM must have an alias")));
 397
 398         /*
 399          * Analyze and transform the subquery.
 400          */
 401         parsetrees = parse_sub_analyze(r->subquery, pstate);
 402
 403         /*
 404          * Check that we got something reasonable.      Most of these conditions
 405          * are probably impossible given restrictions of the grammar, but
 406          * check 'em anyway.
 407          */
 408         if (length(parsetrees) != 1)
 409                 elog(ERROR, "unexpected parse analysis result for subquery in FROM");
 410         query = (Query *) lfirst(parsetrees);
 411         if (query == NULL || !IsA(query, Query))
 412                 elog(ERROR, "unexpected parse analysis result for subquery in FROM");
 413
 414         if (query->commandType != CMD_SELECT)
 415                 elog(ERROR, "expected SELECT query from subquery in FROM");
 416         if (query->resultRelation != 0 || query->into != NULL)
 417                 ereport(ERROR,
 418                                 (errcode(ERRCODE_SYNTAX_ERROR),
 419                                  errmsg("subquery in FROM may not have SELECT INTO")));
 420
 421         /*
 422          * The subquery cannot make use of any variables from FROM items
 423          * created earlier in the current query.  Per SQL92, the scope of a
 424          * FROM item does not include other FROM items.  Formerly we hacked
 425          * the namespace so that the other variables weren't even visible, but
 426          * it seems more useful to leave them visible and give a specific
 427          * error message.
 428          *
 429          * XXX this will need further work to support SQL99's LATERAL() feature,
 430          * wherein such references would indeed be legal.
 431          *
 432          * We can skip groveling through the subquery if there's not anything
 433          * visible in the current query.  Also note that outer references are
 434          * OK.
 435          */
 436         if (pstate->p_namespace)
 437         {
 438                 if (contain_vars_of_level((Node *) query, 1))
 439                         ereport(ERROR,
 440                                         (errcode(ERRCODE_INVALID_COLUMN_REFERENCE),
 441                                          errmsg("subquery in FROM may not refer to other relations of same query level")));
 442         }
 443
 444         /*
 445          * OK, build an RTE for the subquery.
 446          */
 447         rte = addRangeTableEntryForSubquery(pstate, query, r->alias, true);
 448
 449         /*
 450          * We create a RangeTblRef, but we do not add it to the joinlist or
 451          * namespace; our caller must do that if appropriate.
 452          */
 453         rtr = makeNode(RangeTblRef);
 454         /* assume new rte is at end */
 455         rtr->rtindex = length(pstate->p_rtable);
 456         Assert(rte == rt_fetch(rtr->rtindex, pstate->p_rtable));
 457
 458         return rtr;
 459 }
 460
 461
 462 /*
 463  * transformRangeFunction --- transform a function call appearing in FROM
 464  */
 465 static RangeTblRef *
 466 transformRangeFunction(ParseState *pstate, RangeFunction *r)
 467 {
 468         Node       *funcexpr;
 469         char       *funcname;
 470         RangeTblEntry *rte;
 471         RangeTblRef *rtr;
 472
 473         /* Get function name for possible use as alias */
 474         Assert(IsA(r->funccallnode, FuncCall));
 475         funcname = strVal(llast(((FuncCall *) r->funccallnode)->funcname));
 476
 477         /*
 478          * Transform the raw FuncCall node.
 479          */
 480         funcexpr = transformExpr(pstate, r->funccallnode);
 481
 482         /*
 483          * The function parameters cannot make use of any variables from other
 484          * FROM items.  (Compare to transformRangeSubselect(); the coding is
 485          * different though because we didn't parse as a sub-select with its
 486          * own level of namespace.)
 487          *
 488          * XXX this will need further work to support SQL99's LATERAL() feature,
 489          * wherein such references would indeed be legal.
 490          */
 491         if (pstate->p_namespace)
 492         {
 493                 if (contain_vars_of_level(funcexpr, 0))
 494                         ereport(ERROR,
 495                                         (errcode(ERRCODE_INVALID_COLUMN_REFERENCE),
 496                                          errmsg("function expression in FROM may not refer to other relations of same query level")));
 497         }
 498
 499         /*
 500          * Disallow aggregate functions in the expression.      (No reason to
 501          * postpone this check until parseCheckAggregates.)
 502          */
 503         if (pstate->p_hasAggs)
 504         {
 505                 if (checkExprHasAggs(funcexpr))
 506                         ereport(ERROR,
 507                                         (errcode(ERRCODE_GROUPING_ERROR),
 508                                          errmsg("cannot use aggregate function in function expression in FROM")));
 509         }
 510
 511         /*
 512          * If a coldeflist is supplied, ensure it defines a legal set of names
 513          * (no duplicates) and datatypes (no pseudo-types, for instance).
 514          */
 515         if (r->coldeflist)
 516         {
 517                 TupleDesc       tupdesc;
 518
 519                 tupdesc = BuildDescForRelation(r->coldeflist);
 520                 CheckAttributeNamesTypes(tupdesc, RELKIND_COMPOSITE_TYPE);
 521         }
 522
 523         /*
 524          * OK, build an RTE for the function.
 525          */
 526         rte = addRangeTableEntryForFunction(pstate, funcname, funcexpr,
 527                                                                                 r, true);
 528
 529         /*
 530          * We create a RangeTblRef, but we do not add it to the joinlist or
 531          * namespace; our caller must do that if appropriate.
 532          */
 533         rtr = makeNode(RangeTblRef);
 534         /* assume new rte is at end */
 535         rtr->rtindex = length(pstate->p_rtable);
 536         Assert(rte == rt_fetch(rtr->rtindex, pstate->p_rtable));
 537
 538         return rtr;
 539 }
 540
 541
 542 /*
 543  * transformFromClauseItem -
 544  *        Transform a FROM-clause item, adding any required entries to the
 545  *        range table list being built in the ParseState, and return the
 546  *        transformed item ready to include in the joinlist and namespace.
 547  *        This routine can recurse to handle SQL92 JOIN expressions.
 548  *
 549  *        Aside from the primary return value (the transformed joinlist item)
 550  *        this routine also returns an integer list of the rangetable indexes
 551  *        of all the base and join relations represented in the joinlist item.
 552  *        This list is needed for checking JOIN/ON conditions in higher levels.
 553  */
 554 static Node *
 555 transformFromClauseItem(ParseState *pstate, Node *n, List **containedRels)
 556 {
 557         if (IsA(n, RangeVar))
 558         {
 559                 /* Plain relation reference */
 560                 RangeTblRef *rtr;
 561
 562                 rtr = transformTableEntry(pstate, (RangeVar *) n);
 563                 *containedRels = makeListi1(rtr->rtindex);
 564                 return (Node *) rtr;
 565         }
 566         else if (IsA(n, RangeSubselect))
 567         {
 568                 /* sub-SELECT is like a plain relation */
 569                 RangeTblRef *rtr;
 570
 571                 rtr = transformRangeSubselect(pstate, (RangeSubselect *) n);
 572                 *containedRels = makeListi1(rtr->rtindex);
 573                 return (Node *) rtr;
 574         }
 575         else if (IsA(n, RangeFunction))
 576         {
 577                 /* function is like a plain relation */
 578                 RangeTblRef *rtr;
 579
 580                 rtr = transformRangeFunction(pstate, (RangeFunction *) n);
 581                 *containedRels = makeListi1(rtr->rtindex);
 582                 return (Node *) rtr;
 583         }
 584         else if (IsA(n, JoinExpr))
 585         {
 586                 /* A newfangled join expression */
 587                 JoinExpr   *j = (JoinExpr *) n;
 588                 List       *my_containedRels,
 589                                    *l_containedRels,
 590                                    *r_containedRels,
 591                                    *l_colnames,
 592                                    *r_colnames,
 593                                    *res_colnames,
 594                                    *l_colvars,
 595                                    *r_colvars,
 596                                    *res_colvars;
 597                 Index           leftrti,
 598                                         rightrti;
 599                 RangeTblEntry *rte;
 600
 601                 /*
 602                  * Recursively process the left and right subtrees
 603                  */
 604                 j->larg = transformFromClauseItem(pstate, j->larg, &l_containedRels);
 605                 j->rarg = transformFromClauseItem(pstate, j->rarg, &r_containedRels);
 606
 607                 /*
 608                  * Generate combined list of relation indexes for possible use by
 609                  * transformJoinOnClause below.
 610                  */
 611                 my_containedRels = nconc(l_containedRels, r_containedRels);
 612
 613                 /*
 614                  * Check for conflicting refnames in left and right subtrees. Must
 615                  * do this because higher levels will assume I hand back a self-
 616                  * consistent namespace subtree.
 617                  */
 618                 checkNameSpaceConflicts(pstate, j->larg, j->rarg);
 619
 620                 /*
 621                  * Extract column name and var lists from both subtrees
 622                  *
 623                  * Note: expandRTE returns new lists, safe for me to modify
 624                  */
 625                 if (IsA(j->larg, RangeTblRef))
 626                         leftrti = ((RangeTblRef *) j->larg)->rtindex;
 627                 else if (IsA(j->larg, JoinExpr))
 628                         leftrti = ((JoinExpr *) j->larg)->rtindex;
 629                 else
 630                 {
 631                         elog(ERROR, "unrecognized node type: %d", (int) nodeTag(j->larg));
 632                         leftrti = 0;            /* keep compiler quiet */
 633                 }
 634                 rte = rt_fetch(leftrti, pstate->p_rtable);
 635                 expandRTE(pstate, rte, &l_colnames, &l_colvars);
 636
 637                 if (IsA(j->rarg, RangeTblRef))
 638                         rightrti = ((RangeTblRef *) j->rarg)->rtindex;
 639                 else if (IsA(j->rarg, JoinExpr))
 640                         rightrti = ((JoinExpr *) j->rarg)->rtindex;
 641                 else
 642                 {
 643                         elog(ERROR, "unrecognized node type: %d", (int) nodeTag(j->rarg));
 644                         rightrti = 0;           /* keep compiler quiet */
 645                 }
 646                 rte = rt_fetch(rightrti, pstate->p_rtable);
 647                 expandRTE(pstate, rte, &r_colnames, &r_colvars);
 648
 649                 /*
 650                  * Natural join does not explicitly specify columns; must generate
 651                  * columns to join. Need to run through the list of columns from
 652                  * each table or join result and match up the column names. Use
 653                  * the first table, and check every column in the second table for
 654                  * a match.  (We'll check that the matches were unique later on.)
 655                  * The result of this step is a list of column names just like an
 656                  * explicitly-written USING list.
 657                  */
 658                 if (j->isNatural)
 659                 {
 660                         List       *rlist = NIL;
 661                         List       *lx,
 662                                            *rx;
 663
 664                         Assert(j->using == NIL);        /* shouldn't have USING() too */
 665
 666                         foreach(lx, l_colnames)
 667                         {
 668                                 char       *l_colname = strVal(lfirst(lx));
 669                                 Value      *m_name = NULL;
 670
 671                                 foreach(rx, r_colnames)
 672                                 {
 673                                         char       *r_colname = strVal(lfirst(rx));
 674
 675                                         if (strcmp(l_colname, r_colname) == 0)
 676                                         {
 677                                                 m_name = makeString(l_colname);
 678                                                 break;
 679                                         }
 680                                 }
 681
 682                                 /* matched a right column? then keep as join column... */
 683                                 if (m_name != NULL)
 684                                         rlist = lappend(rlist, m_name);
 685                         }
 686
 687                         j->using = rlist;
 688                 }
 689
 690                 /*
 691                  * Now transform the join qualifications, if any.
 692                  */
 693                 res_colnames = NIL;
 694                 res_colvars = NIL;
 695
 696                 if (j->using)
 697                 {
 698                         /*
 699                          * JOIN/USING (or NATURAL JOIN, as transformed above).
 700                          * Transform the list into an explicit ON-condition, and
 701                          * generate a list of merged result columns.
 702                          */
 703                         List       *ucols = j->using;
 704                         List       *l_usingvars = NIL;
 705                         List       *r_usingvars = NIL;
 706                         List       *ucol;
 707
 708                         Assert(j->quals == NULL);       /* shouldn't have ON() too */
 709
 710                         foreach(ucol, ucols)
 711                         {
 712                                 char       *u_colname = strVal(lfirst(ucol));
 713                                 List       *col;
 714                                 int                     ndx;
 715                                 int                     l_index = -1;
 716                                 int                     r_index = -1;
 717                                 Var                *l_colvar,
 718                                                    *r_colvar;
 719
 720                                 /* Check for USING(foo,foo) */
 721                                 foreach(col, res_colnames)
 722                                 {
 723                                         char       *res_colname = strVal(lfirst(col));
 724
 725                                         if (strcmp(res_colname, u_colname) == 0)
 726                                                 ereport(ERROR,
 727                                                                 (errcode(ERRCODE_DUPLICATE_COLUMN),
 728                                                                  errmsg("column name \"%s\" appears more than once in USING clause",
 729                                                                                 u_colname)));
 730                                 }
 731
 732                                 /* Find it in left input */
 733                                 ndx = 0;
 734                                 foreach(col, l_colnames)
 735                                 {
 736                                         char       *l_colname = strVal(lfirst(col));
 737
 738                                         if (strcmp(l_colname, u_colname) == 0)
 739                                         {
 740                                                 if (l_index >= 0)
 741                                                         ereport(ERROR,
 742                                                                         (errcode(ERRCODE_AMBIGUOUS_COLUMN),
 743                                                                          errmsg("common column name \"%s\" appears more than once in left table",
 744                                                                                         u_colname)));
 745                                                 l_index = ndx;
 746                                         }
 747                                         ndx++;
 748                                 }
 749                                 if (l_index < 0)
 750                                         ereport(ERROR,
 751                                                         (errcode(ERRCODE_UNDEFINED_COLUMN),
 752                                                          errmsg("column \"%s\" specified in USING clause not found in left table",
 753                                                                         u_colname)));
 754
 755                                 /* Find it in right input */
 756                                 ndx = 0;
 757                                 foreach(col, r_colnames)
 758                                 {
 759                                         char       *r_colname = strVal(lfirst(col));
 760
 761                                         if (strcmp(r_colname, u_colname) == 0)
 762                                         {
 763                                                 if (r_index >= 0)
 764                                                         ereport(ERROR,
 765                                                                         (errcode(ERRCODE_AMBIGUOUS_COLUMN),
 766                                                                          errmsg("common column name \"%s\" appears more than once in right table",
 767                                                                                         u_colname)));
 768                                                 r_index = ndx;
 769                                         }
 770                                         ndx++;
 771                                 }
 772                                 if (r_index < 0)
 773                                         ereport(ERROR,
 774                                                         (errcode(ERRCODE_UNDEFINED_COLUMN),
 775                                                          errmsg("column \"%s\" specified in USING clause not found in right table",
 776                                                                         u_colname)));
 777
 778                                 l_colvar = nth(l_index, l_colvars);
 779                                 l_usingvars = lappend(l_usingvars, l_colvar);
 780                                 r_colvar = nth(r_index, r_colvars);
 781                                 r_usingvars = lappend(r_usingvars, r_colvar);
 782
 783                                 res_colnames = lappend(res_colnames, lfirst(ucol));
 784                                 res_colvars = lappend(res_colvars,
 785                                                                           buildMergedJoinVar(pstate,
 786                                                                                                                  j->jointype,
 787                                                                                                                  l_colvar,
 788                                                                                                                  r_colvar));
 789                         }
 790
 791                         j->quals = transformJoinUsingClause(pstate,
 792                                                                                                 l_usingvars,
 793                                                                                                 r_usingvars);
 794                 }
 795                 else if (j->quals)
 796                 {
 797                         /* User-written ON-condition; transform it */
 798                         j->quals = transformJoinOnClause(pstate, j, my_containedRels);
 799                 }
 800                 else
 801                 {
 802                         /* CROSS JOIN: no quals */
 803                 }
 804
 805                 /* Add remaining columns from each side to the output columns */
 806                 extractRemainingColumns(res_colnames,
 807                                                                 l_colnames, l_colvars,
 808                                                                 &l_colnames, &l_colvars);
 809                 extractRemainingColumns(res_colnames,
 810                                                                 r_colnames, r_colvars,
 811                                                                 &r_colnames, &r_colvars);
 812                 res_colnames = nconc(res_colnames, l_colnames);
 813                 res_colvars = nconc(res_colvars, l_colvars);
 814                 res_colnames = nconc(res_colnames, r_colnames);
 815                 res_colvars = nconc(res_colvars, r_colvars);
 816
 817                 /*
 818                  * Check alias (AS clause), if any.
 819                  */
 820                 if (j->alias)
 821                 {
 822                         if (j->alias->colnames != NIL)
 823                         {
 824                                 if (length(j->alias->colnames) > length(res_colnames))
 825                                         ereport(ERROR,
 826                                                         (errcode(ERRCODE_SYNTAX_ERROR),
 827                                                          errmsg("column alias list for \"%s\" has too many entries",
 828                                                                         j->alias->aliasname)));
 829                         }
 830                 }
 831
 832                 /*
 833                  * Now build an RTE for the result of the join
 834                  */
 835                 rte = addRangeTableEntryForJoin(pstate,
 836                                                                                 res_colnames,
 837                                                                                 j->jointype,
 838                                                                                 res_colvars,
 839                                                                                 j->alias,
 840                                                                                 true);
 841
 842                 /* assume new rte is at end */
 843                 j->rtindex = length(pstate->p_rtable);
 844                 Assert(rte == rt_fetch(j->rtindex, pstate->p_rtable));
 845
 846                 /*
 847                  * Include join RTE in returned containedRels list
 848                  */
 849                 *containedRels = lconsi(j->rtindex, my_containedRels);
 850
 851                 return (Node *) j;
 852         }
 853         else
 854                 elog(ERROR, "unrecognized node type: %d", (int) nodeTag(n));
 855         return NULL;                            /* can't get here, keep compiler quiet */
 856 }
 857
 858 /*
 859  * buildMergedJoinVar -
 860  *        generate a suitable replacement expression for a merged join column
 861  */
 862 static Node *
 863 buildMergedJoinVar(ParseState *pstate, JoinType jointype,
 864                                    Var *l_colvar, Var *r_colvar)
 865 {
 866         Oid                     outcoltype;
 867         int32           outcoltypmod;
 868         Node       *l_node,
 869                            *r_node,
 870                            *res_node;
 871
 872         /*
 873          * Choose output type if input types are dissimilar.
 874          */
 875         outcoltype = l_colvar->vartype;
 876         outcoltypmod = l_colvar->vartypmod;
 877         if (outcoltype != r_colvar->vartype)
 878         {
 879                 outcoltype = select_common_type(makeListo2(l_colvar->vartype,
 880                                                                                                    r_colvar->vartype),
 881                                                                                 "JOIN/USING");
 882                 outcoltypmod = -1;              /* ie, unknown */
 883         }
 884         else if (outcoltypmod != r_colvar->vartypmod)
 885         {
 886                 /* same type, but not same typmod */
 887                 outcoltypmod = -1;              /* ie, unknown */
 888         }
 889
 890         /*
 891          * Insert coercion functions if needed.  Note that a difference in
 892          * typmod can only happen if input has typmod but outcoltypmod is -1.
 893          * In that case we insert a RelabelType to clearly mark that result's
 894          * typmod is not same as input.
 895          */
 896         if (l_colvar->vartype != outcoltype)
 897                 l_node = coerce_type(pstate, (Node *) l_colvar, l_colvar->vartype,
 898                                                          outcoltype,
 899                                                          COERCION_IMPLICIT, COERCE_IMPLICIT_CAST);
 900         else if (l_colvar->vartypmod != outcoltypmod)
 901                 l_node = (Node *) makeRelabelType((Expr *) l_colvar,
 902                                                                                   outcoltype, outcoltypmod,
 903                                                                                   COERCE_IMPLICIT_CAST);
 904         else
 905                 l_node = (Node *) l_colvar;
 906
 907         if (r_colvar->vartype != outcoltype)
 908                 r_node = coerce_type(pstate, (Node *) r_colvar, r_colvar->vartype,
 909                                                          outcoltype,
 910                                                          COERCION_IMPLICIT, COERCE_IMPLICIT_CAST);
 911         else if (r_colvar->vartypmod != outcoltypmod)
 912                 r_node = (Node *) makeRelabelType((Expr *) r_colvar,
 913                                                                                   outcoltype, outcoltypmod,
 914                                                                                   COERCE_IMPLICIT_CAST);
 915         else
 916                 r_node = (Node *) r_colvar;
 917
 918         /*
 919          * Choose what to emit
 920          */
 921         switch (jointype)
 922         {
 923                 case JOIN_INNER:
 924
 925                         /*
 926                          * We can use either var; prefer non-coerced one if available.
 927                          */
 928                         if (IsA(l_node, Var))
 929                                 res_node = l_node;
 930                         else if (IsA(r_node, Var))
 931                                 res_node = r_node;
 932                         else
 933                                 res_node = l_node;
 934                         break;
 935                 case JOIN_LEFT:
 936                         /* Always use left var */
 937                         res_node = l_node;
 938                         break;
 939                 case JOIN_RIGHT:
 940                         /* Always use right var */
 941                         res_node = r_node;
 942                         break;
 943                 case JOIN_FULL:
 944                         {
 945                                 /*
 946                                  * Here we must build a COALESCE expression to ensure that
 947                                  * the join output is non-null if either input is.
 948                                  */
 949                                 CoalesceExpr *c = makeNode(CoalesceExpr);
 950
 951                                 c->coalescetype = outcoltype;
 952                                 c->args = makeList2(l_node, r_node);
 953                                 res_node = (Node *) c;
 954                                 break;
 955                         }
 956                 default:
 957                         elog(ERROR, "unrecognized join type: %d", (int) jointype);
 958                         res_node = NULL;        /* keep compiler quiet */
 959                         break;
 960         }
 961
 962         return res_node;
 963 }
 964
 965
 966 /*
 967  * transformWhereClause -
 968  *        Transform the qualification and make sure it is of type boolean.
 969  *        Used for WHERE and allied clauses.
 970  *
 971  * constructName does not affect the semantics, but is used in error messages
 972  */
 973 Node *
 974 transformWhereClause(ParseState *pstate, Node *clause,
 975                                          const char *constructName)
 976 {
 977         Node       *qual;
 978
 979         if (clause == NULL)
 980                 return NULL;
 981
 982         qual = transformExpr(pstate, clause);
 983
 984         qual = coerce_to_boolean(pstate, qual, constructName);
 985
 986         return qual;
 987 }
 988
 989
 990 /*
 991  * transformLimitClause -
 992  *        Transform the expression and make sure it is of type integer.
 993  *        Used for LIMIT and allied clauses.
 994  *
 995  * constructName does not affect the semantics, but is used in error messages
 996  */
 997 Node *
 998 transformLimitClause(ParseState *pstate, Node *clause,
 999                                          const char *constructName)
1000 {
1001         Node       *qual;
1002
1003         if (clause == NULL)
1004                 return NULL;
1005
1006         qual = transformExpr(pstate, clause);
1007
1008         qual = coerce_to_integer(pstate, qual, constructName);
1009
1010         /*
1011          * LIMIT can't refer to any vars or aggregates of the current query;
1012          * we don't allow subselects either (though that case would at least
1013          * be sensible)
1014          */
1015         if (contain_vars_of_level(qual, 0))
1016         {
1017                 ereport(ERROR,
1018                                 (errcode(ERRCODE_INVALID_COLUMN_REFERENCE),
1019                 /* translator: %s is name of a SQL construct, eg LIMIT */
1020                                  errmsg("argument of %s must not contain variables",
1021                                                 constructName)));
1022         }
1023         if (checkExprHasAggs(qual))
1024         {
1025                 ereport(ERROR,
1026                                 (errcode(ERRCODE_GROUPING_ERROR),
1027                 /* translator: %s is name of a SQL construct, eg LIMIT */
1028                                  errmsg("argument of %s must not contain aggregates",
1029                                                 constructName)));
1030         }
1031         if (contain_subplans(qual))
1032         {
1033                 ereport(ERROR,
1034                                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1035                 /* translator: %s is name of a SQL construct, eg LIMIT */
1036                                  errmsg("argument of %s must not contain subqueries",
1037                                                 constructName)));
1038         }
1039
1040         return qual;
1041 }
1042
1043
1044 /*
1045  *      findTargetlistEntry -
1046  *        Returns the targetlist entry matching the given (untransformed) node.
1047  *        If no matching entry exists, one is created and appended to the target
1048  *        list as a "resjunk" node.
1049  *
1050  * node         the ORDER BY, GROUP BY, or DISTINCT ON expression to be matched
1051  * tlist        the existing target list (NB: this will never be NIL, which is a
1052  *                      good thing since we'd be unable to append to it if it were...)
1053  * clause       identifies clause type being processed.
1054  */
1055 static TargetEntry *
1056 findTargetlistEntry(ParseState *pstate, Node *node, List *tlist, int clause)
1057 {
1058         TargetEntry *target_result = NULL;
1059         List       *tl;
1060         Node       *expr;
1061
1062         /*----------
1063          * Handle two special cases as mandated by the SQL92 spec:
1064          *
1065          * 1. Bare ColumnName (no qualifier or subscripts)
1066          *        For a bare identifier, we search for a matching column name
1067          *        in the existing target list.  Multiple matches are an error
1068          *        unless they refer to identical values; for example,
1069          *        we allow      SELECT a, a FROM table ORDER BY a
1070          *        but not       SELECT a AS b, b FROM table ORDER BY b
1071          *        If no match is found, we fall through and treat the identifier
1072          *        as an expression.
1073          *        For GROUP BY, it is incorrect to match the grouping item against
1074          *        targetlist entries: according to SQL92, an identifier in GROUP BY
1075          *        is a reference to a column name exposed by FROM, not to a target
1076          *        list column.  However, many implementations (including pre-7.0
1077          *        PostgreSQL) accept this anyway.  So for GROUP BY, we look first
1078          *        to see if the identifier matches any FROM column name, and only
1079          *        try for a targetlist name if it doesn't.  This ensures that we
1080          *        adhere to the spec in the case where the name could be both.
1081          *        DISTINCT ON isn't in the standard, so we can do what we like there;
1082          *        we choose to make it work like ORDER BY, on the rather flimsy
1083          *        grounds that ordinary DISTINCT works on targetlist entries.
1084          *
1085          * 2. IntegerConstant
1086          *        This means to use the n'th item in the existing target list.
1087          *        Note that it would make no sense to order/group/distinct by an
1088          *        actual constant, so this does not create a conflict with our
1089          *        extension to order/group by an expression.
1090          *        GROUP BY column-number is not allowed by SQL92, but since
1091          *        the standard has no other behavior defined for this syntax,
1092          *        we may as well accept this common extension.
1093          *
1094          * Note that pre-existing resjunk targets must not be used in either case,
1095          * since the user didn't write them in his SELECT list.
1096          *
1097          * If neither special case applies, fall through to treat the item as
1098          * an expression.
1099          *----------
1100          */
1101         if (IsA(node, ColumnRef) &&
1102                 length(((ColumnRef *) node)->fields) == 1 &&
1103                 ((ColumnRef *) node)->indirection == NIL)
1104         {
1105                 char       *name = strVal(lfirst(((ColumnRef *) node)->fields));
1106
1107                 if (clause == GROUP_CLAUSE)
1108                 {
1109                         /*
1110                          * In GROUP BY, we must prefer a match against a FROM-clause
1111                          * column to one against the targetlist.  Look to see if there
1112                          * is a matching column.  If so, fall through to let
1113                          * transformExpr() do the rest.  NOTE: if name could refer
1114                          * ambiguously to more than one column name exposed by FROM,
1115                          * colnameToVar will ereport(ERROR).  That's just what we want
1116                          * here.
1117                          */
1118                         if (colnameToVar(pstate, name) != NULL)
1119                                 name = NULL;
1120                 }
1121
1122                 if (name != NULL)
1123                 {
1124                         foreach(tl, tlist)
1125                         {
1126                                 TargetEntry *tle = (TargetEntry *) lfirst(tl);
1127                                 Resdom     *resnode = tle->resdom;
1128
1129                                 if (!resnode->resjunk &&
1130                                         strcmp(resnode->resname, name) == 0)
1131                                 {
1132                                         if (target_result != NULL)
1133                                         {
1134                                                 if (!equal(target_result->expr, tle->expr))
1135                                                         ereport(ERROR,
1136                                                                         (errcode(ERRCODE_AMBIGUOUS_COLUMN),
1137                                                                          /* translator: first %s is name of a SQL construct, eg ORDER BY */
1138                                                                          errmsg("%s \"%s\" is ambiguous",
1139                                                                                         clauseText[clause], name)));
1140                                         }
1141                                         else
1142                                                 target_result = tle;
1143                                         /* Stay in loop to check for ambiguity */
1144                                 }
1145                         }
1146                         if (target_result != NULL)
1147                                 return target_result;   /* return the first match */
1148                 }
1149         }
1150         if (IsA(node, A_Const))
1151         {
1152                 Value      *val = &((A_Const *) node)->val;
1153                 int                     targetlist_pos = 0;
1154                 int                     target_pos;
1155
1156                 if (!IsA(val, Integer))
1157                         ereport(ERROR,
1158                                         (errcode(ERRCODE_SYNTAX_ERROR),
1159                         /* translator: %s is name of a SQL construct, eg ORDER BY */
1160                                          errmsg("non-integer constant in %s",
1161                                                         clauseText[clause])));
1162                 target_pos = intVal(val);
1163                 foreach(tl, tlist)
1164                 {
1165                         TargetEntry *tle = (TargetEntry *) lfirst(tl);
1166                         Resdom     *resnode = tle->resdom;
1167
1168                         if (!resnode->resjunk)
1169                         {
1170                                 if (++targetlist_pos == target_pos)
1171                                         return tle; /* return the unique match */
1172                         }
1173                 }
1174                 ereport(ERROR,
1175                                 (errcode(ERRCODE_INVALID_COLUMN_REFERENCE),
1176                 /* translator: %s is name of a SQL construct, eg ORDER BY */
1177                                  errmsg("%s position %d is not in select list",
1178                                                 clauseText[clause], target_pos)));
1179         }
1180
1181         /*
1182          * Otherwise, we have an expression (this is a Postgres extension not
1183          * found in SQL92).  Convert the untransformed node to a transformed
1184          * expression, and search for a match in the tlist. NOTE: it doesn't
1185          * really matter whether there is more than one match.  Also, we are
1186          * willing to match a resjunk target here, though the above cases must
1187          * ignore resjunk targets.
1188          */
1189         expr = transformExpr(pstate, node);
1190
1191         foreach(tl, tlist)
1192         {
1193                 TargetEntry *tle = (TargetEntry *) lfirst(tl);
1194
1195                 if (equal(expr, tle->expr))
1196                         return tle;
1197         }
1198
1199         /*
1200          * If no matches, construct a new target entry which is appended to
1201          * the end of the target list.  This target is given resjunk = TRUE so
1202          * that it will not be projected into the final tuple.
1203          */
1204         target_result = transformTargetEntry(pstate, node, expr, NULL, true);
1205         lappend(tlist, target_result);
1206
1207         return target_result;
1208 }
1209
1210
1211 /*
1212  * transformGroupClause -
1213  *        transform a GROUP BY clause
1214  */
1215 List *
1216 transformGroupClause(ParseState *pstate, List *grouplist,
1217                                          List *targetlist, List *sortClause)
1218 {
1219         List       *glist = NIL,
1220                            *gl;
1221
1222         foreach(gl, grouplist)
1223         {
1224                 TargetEntry *tle;
1225                 Oid                     restype;
1226                 Oid                     ordering_op;
1227                 GroupClause *grpcl;
1228
1229                 tle = findTargetlistEntry(pstate, lfirst(gl),
1230                                                                   targetlist, GROUP_CLAUSE);
1231
1232                 /* avoid making duplicate grouplist entries */
1233                 if (targetIsInSortList(tle, glist))
1234                         continue;
1235
1236                 /* if tlist item is an UNKNOWN literal, change it to TEXT */
1237                 restype = tle->resdom->restype;
1238
1239                 if (restype == UNKNOWNOID)
1240                 {
1241                         tle->expr = (Expr *) coerce_type(pstate, (Node *) tle->expr,
1242                                                                                          restype, TEXTOID,
1243                                                                                          COERCION_IMPLICIT,
1244                                                                                          COERCE_IMPLICIT_CAST);
1245                         restype = tle->resdom->restype = TEXTOID;
1246                         tle->resdom->restypmod = -1;
1247                 }
1248
1249                 /*
1250                  * If the GROUP BY clause matches the ORDER BY clause, we want to
1251                  * adopt the ordering operators from the latter rather than using
1252                  * the default ops.  This allows "GROUP BY foo ORDER BY foo DESC"
1253                  * to be done with only one sort step.  Note we are assuming that
1254                  * any user-supplied ordering operator will bring equal values
1255                  * together, which is all that GROUP BY needs.
1256                  */
1257                 if (sortClause &&
1258                         ((SortClause *) lfirst(sortClause))->tleSortGroupRef ==
1259                         tle->resdom->ressortgroupref)
1260                 {
1261                         ordering_op = ((SortClause *) lfirst(sortClause))->sortop;
1262                         sortClause = lnext(sortClause);
1263                 }
1264                 else
1265                 {
1266                         ordering_op = ordering_oper_opid(restype);
1267                         sortClause = NIL;       /* disregard ORDER BY once match fails */
1268                 }
1269
1270                 grpcl = makeNode(GroupClause);
1271                 grpcl->tleSortGroupRef = assignSortGroupRef(tle, targetlist);
1272                 grpcl->sortop = ordering_op;
1273                 glist = lappend(glist, grpcl);
1274         }
1275
1276         return glist;
1277 }
1278
1279 /*
1280  * transformSortClause -
1281  *        transform an ORDER BY clause
1282  */
1283 List *
1284 transformSortClause(ParseState *pstate,
1285                                         List *orderlist,
1286                                         List *targetlist,
1287                                         bool resolveUnknown)
1288 {
1289         List       *sortlist = NIL;
1290         List       *olitem;
1291
1292         foreach(olitem, orderlist)
1293         {
1294                 SortBy     *sortby = lfirst(olitem);
1295                 TargetEntry *tle;
1296
1297                 tle = findTargetlistEntry(pstate, sortby->node,
1298                                                                   targetlist, ORDER_CLAUSE);
1299
1300                 sortlist = addTargetToSortList(pstate, tle,
1301                                                                            sortlist, targetlist,
1302                                                                            sortby->sortby_kind,
1303                                                                            sortby->useOp,
1304                                                                            resolveUnknown);
1305         }
1306
1307         return sortlist;
1308 }
1309
1310 /*
1311  * transformDistinctClause -
1312  *        transform a DISTINCT or DISTINCT ON clause
1313  *
1314  * Since we may need to add items to the query's sortClause list, that list
1315  * is passed by reference.      We might also need to add items to the query's
1316  * targetlist, but we assume that cannot be empty initially, so we can
1317  * lappend to it even though the pointer is passed by value.
1318  */
1319 List *
1320 transformDistinctClause(ParseState *pstate, List *distinctlist,
1321                                                 List *targetlist, List **sortClause)
1322 {
1323         List       *result = NIL;
1324         List       *slitem;
1325         List       *dlitem;
1326
1327         /* No work if there was no DISTINCT clause */
1328         if (distinctlist == NIL)
1329                 return NIL;
1330
1331         if (lfirst(distinctlist) == NIL)
1332         {
1333                 /* We had SELECT DISTINCT */
1334
1335                 /*
1336                  * All non-resjunk elements from target list that are not already
1337                  * in the sort list should be added to it.      (We don't really care
1338                  * what order the DISTINCT fields are checked in, so we can leave
1339                  * the user's ORDER BY spec alone, and just add additional sort
1340                  * keys to it to ensure that all targetlist items get sorted.)
1341                  */
1342                 *sortClause = addAllTargetsToSortList(pstate,
1343                                                                                           *sortClause,
1344                                                                                           targetlist,
1345                                                                                           true);
1346
1347                 /*
1348                  * Now, DISTINCT list consists of all non-resjunk sortlist items.
1349                  * Actually, all the sortlist items had better be non-resjunk!
1350                  * Otherwise, user wrote SELECT DISTINCT with an ORDER BY item
1351                  * that does not appear anywhere in the SELECT targetlist, and we
1352                  * can't implement that with only one sorting pass...
1353                  */
1354                 foreach(slitem, *sortClause)
1355                 {
1356                         SortClause *scl = (SortClause *) lfirst(slitem);
1357                         TargetEntry *tle = get_sortgroupclause_tle(scl, targetlist);
1358
1359                         if (tle->resdom->resjunk)
1360                                 ereport(ERROR,
1361                                                 (errcode(ERRCODE_INVALID_COLUMN_REFERENCE),
1362                                                  errmsg("for SELECT DISTINCT, ORDER BY expressions must appear in select list")));
1363                         else
1364                                 result = lappend(result, copyObject(scl));
1365                 }
1366         }
1367         else
1368         {
1369                 /* We had SELECT DISTINCT ON (expr, ...) */
1370
1371                 /*
1372                  * If the user writes both DISTINCT ON and ORDER BY, then the two
1373                  * expression lists must match (until one or the other runs out).
1374                  * Otherwise the ORDER BY requires a different sort order than the
1375                  * DISTINCT does, and we can't implement that with only one sort
1376                  * pass (and if we do two passes, the results will be rather
1377                  * unpredictable). However, it's OK to have more DISTINCT ON
1378                  * expressions than ORDER BY expressions; we can just add the
1379                  * extra DISTINCT values to the sort list, much as we did above
1380                  * for ordinary DISTINCT fields.
1381                  *
1382                  * Actually, it'd be OK for the common prefixes of the two lists to
1383                  * match in any order, but implementing that check seems like more
1384                  * trouble than it's worth.
1385                  */
1386                 List       *nextsortlist = *sortClause;
1387
1388                 foreach(dlitem, distinctlist)
1389                 {
1390                         TargetEntry *tle;
1391
1392                         tle = findTargetlistEntry(pstate, lfirst(dlitem),
1393                                                                           targetlist, DISTINCT_ON_CLAUSE);
1394
1395                         if (nextsortlist != NIL)
1396                         {
1397                                 SortClause *scl = (SortClause *) lfirst(nextsortlist);
1398
1399                                 if (tle->resdom->ressortgroupref != scl->tleSortGroupRef)
1400                                         ereport(ERROR,
1401                                                         (errcode(ERRCODE_INVALID_COLUMN_REFERENCE),
1402                                                          errmsg("SELECT DISTINCT ON expressions must match initial ORDER BY expressions")));
1403                                 result = lappend(result, copyObject(scl));
1404                                 nextsortlist = lnext(nextsortlist);
1405                         }
1406                         else
1407                         {
1408                                 *sortClause = addTargetToSortList(pstate, tle,
1409                                                                                                   *sortClause, targetlist,
1410                                                                                                   SORTBY_ASC, NIL, true);
1411
1412                                 /*
1413                                  * Probably, the tle should always have been added at the
1414                                  * end of the sort list ... but search to be safe.
1415                                  */
1416                                 foreach(slitem, *sortClause)
1417                                 {
1418                                         SortClause *scl = (SortClause *) lfirst(slitem);
1419
1420                                         if (tle->resdom->ressortgroupref == scl->tleSortGroupRef)
1421                                         {
1422                                                 result = lappend(result, copyObject(scl));
1423                                                 break;
1424                                         }
1425                                 }
1426                                 if (slitem == NIL)              /* should not happen */
1427                                         elog(ERROR, "failed to add DISTINCT ON clause to target list");
1428                         }
1429                 }
1430         }
1431
1432         return result;
1433 }
1434
1435 /*
1436  * addAllTargetsToSortList
1437  *              Make sure all non-resjunk targets in the targetlist are in the
1438  *              ORDER BY list, adding the not-yet-sorted ones to the end of the list.
1439  *              This is typically used to help implement SELECT DISTINCT.
1440  *
1441  * See addTargetToSortList for info about pstate and resolveUnknown inputs.
1442  *
1443  * Returns the updated ORDER BY list.
1444  */
1445 List *
1446 addAllTargetsToSortList(ParseState *pstate, List *sortlist,
1447                                                 List *targetlist, bool resolveUnknown)
1448 {
1449         List       *i;
1450
1451         foreach(i, targetlist)
1452         {
1453                 TargetEntry *tle = (TargetEntry *) lfirst(i);
1454
1455                 if (!tle->resdom->resjunk)
1456                         sortlist = addTargetToSortList(pstate, tle,
1457                                                                                    sortlist, targetlist,
1458                                                                                    SORTBY_ASC, NIL,
1459                                                                                    resolveUnknown);
1460         }
1461         return sortlist;
1462 }
1463
1464 /*
1465  * addTargetToSortList
1466  *              If the given targetlist entry isn't already in the ORDER BY list,
1467  *              add it to the end of the list, using the sortop with given name
1468  *              or the default sort operator if opname == NIL.
1469  *
1470  * If resolveUnknown is TRUE, convert TLEs of type UNKNOWN to TEXT.  If not,
1471  * do nothing (which implies the search for a sort operator will fail).
1472  * pstate should be provided if resolveUnknown is TRUE, but can be NULL
1473  * otherwise.
1474  *
1475  * Returns the updated ORDER BY list.
1476  */
1477 List *
1478 addTargetToSortList(ParseState *pstate, TargetEntry *tle,
1479                                         List *sortlist, List *targetlist,
1480                                         int sortby_kind, List *sortby_opname,
1481                                         bool resolveUnknown)
1482 {
1483         /* avoid making duplicate sortlist entries */
1484         if (!targetIsInSortList(tle, sortlist))
1485         {
1486                 SortClause *sortcl = makeNode(SortClause);
1487                 Oid                     restype = tle->resdom->restype;
1488
1489                 /* if tlist item is an UNKNOWN literal, change it to TEXT */
1490                 if (restype == UNKNOWNOID && resolveUnknown)
1491                 {
1492                         tle->expr = (Expr *) coerce_type(pstate, (Node *) tle->expr,
1493                                                                                          restype, TEXTOID,
1494                                                                                          COERCION_IMPLICIT,
1495                                                                                          COERCE_IMPLICIT_CAST);
1496                         restype = tle->resdom->restype = TEXTOID;
1497                         tle->resdom->restypmod = -1;
1498                 }
1499
1500                 sortcl->tleSortGroupRef = assignSortGroupRef(tle, targetlist);
1501
1502                 switch (sortby_kind)
1503                 {
1504                         case SORTBY_ASC:
1505                                 sortcl->sortop = ordering_oper_opid(restype);
1506                                 break;
1507                         case SORTBY_DESC:
1508                                 sortcl->sortop = reverse_ordering_oper_opid(restype);
1509                                 break;
1510                         case SORTBY_USING:
1511                                 Assert(sortby_opname != NIL);
1512                                 sortcl->sortop = compatible_oper_opid(sortby_opname,
1513                                                                                                           restype,
1514                                                                                                           restype,
1515                                                                                                           false);
1516                                 break;
1517                         default:
1518                                 elog(ERROR, "unrecognized sortby_kind: %d", sortby_kind);
1519                                 break;
1520                 }
1521
1522                 sortlist = lappend(sortlist, sortcl);
1523         }
1524         return sortlist;
1525 }
1526
1527 /*
1528  * assignSortGroupRef
1529  *        Assign the targetentry an unused ressortgroupref, if it doesn't
1530  *        already have one.  Return the assigned or pre-existing refnumber.
1531  *
1532  * 'tlist' is the targetlist containing (or to contain) the given targetentry.
1533  */
1534 Index
1535 assignSortGroupRef(TargetEntry *tle, List *tlist)
1536 {
1537         Index           maxRef;
1538         List       *l;
1539
1540         if (tle->resdom->ressortgroupref)       /* already has one? */
1541                 return tle->resdom->ressortgroupref;
1542
1543         /* easiest way to pick an unused refnumber: max used + 1 */
1544         maxRef = 0;
1545         foreach(l, tlist)
1546         {
1547                 Index           ref = ((TargetEntry *) lfirst(l))->resdom->ressortgroupref;
1548
1549                 if (ref > maxRef)
1550                         maxRef = ref;
1551         }
1552         tle->resdom->ressortgroupref = maxRef + 1;
1553         return tle->resdom->ressortgroupref;
1554 }
1555
1556 /*
1557  * targetIsInSortList
1558  *              Is the given target item already in the sortlist?
1559  *
1560  * Works for both SortClause and GroupClause lists.  Note that the main
1561  * reason we need this routine (and not just a quick test for nonzeroness
1562  * of ressortgroupref) is that a TLE might be in only one of the lists.
1563  */
1564 bool
1565 targetIsInSortList(TargetEntry *tle, List *sortList)
1566 {
1567         Index           ref = tle->resdom->ressortgroupref;
1568         List       *i;
1569
1570         /* no need to scan list if tle has no marker */
1571         if (ref == 0)
1572                 return false;
1573
1574         foreach(i, sortList)
1575         {
1576                 SortClause *scl = (SortClause *) lfirst(i);
1577
1578                 if (scl->tleSortGroupRef == ref)
1579                         return true;
1580         }
1581         return false;
1582 }