granicus.if.org Git - postgresql/blob - src/backend/parser/parse_clause.c

   1 /*-------------------------------------------------------------------------
   2  *
   3  * parse_clause.c
   4  *        handle clauses in parser
   5  *
   6  * Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group
   7  * Portions Copyright (c) 1994, Regents of the University of California
   8  *
   9  *
  10  * IDENTIFICATION
  11  *        $PostgreSQL: pgsql/src/backend/parser/parse_clause.c,v 1.146 2006/03/05 15:58:33 momjian Exp $
  12  *
  13  *-------------------------------------------------------------------------
  14  */
  15
  16 #include "postgres.h"
  17
  18 #include "access/heapam.h"
  19 #include "catalog/heap.h"
  20 #include "nodes/makefuncs.h"
  21 #include "optimizer/clauses.h"
  22 #include "optimizer/tlist.h"
  23 #include "optimizer/var.h"
  24 #include "parser/analyze.h"
  25 #include "parser/parsetree.h"
  26 #include "parser/parse_clause.h"
  27 #include "parser/parse_coerce.h"
  28 #include "parser/parse_expr.h"
  29 #include "parser/parse_oper.h"
  30 #include "parser/parse_relation.h"
  31 #include "parser/parse_target.h"
  32 #include "parser/parse_type.h"
  33 #include "rewrite/rewriteManip.h"
  34 #include "utils/builtins.h"
  35 #include "utils/guc.h"
  36
  37
  38 #define ORDER_CLAUSE 0
  39 #define GROUP_CLAUSE 1
  40 #define DISTINCT_ON_CLAUSE 2
  41
  42 static char *clauseText[] = {"ORDER BY", "GROUP BY", "DISTINCT ON"};
  43
  44 static void extractRemainingColumns(List *common_colnames,
  45                                                 List *src_colnames, List *src_colvars,
  46                                                 List **res_colnames, List **res_colvars);
  47 static Node *transformJoinUsingClause(ParseState *pstate,
  48                                                  List *leftVars, List *rightVars);
  49 static Node *transformJoinOnClause(ParseState *pstate, JoinExpr *j,
  50                                           RangeTblEntry *l_rte,
  51                                           RangeTblEntry *r_rte,
  52                                           List *relnamespace,
  53                                           Relids containedRels);
  54 static RangeTblEntry *transformTableEntry(ParseState *pstate, RangeVar *r);
  55 static RangeTblEntry *transformRangeSubselect(ParseState *pstate,
  56                                                 RangeSubselect *r);
  57 static RangeTblEntry *transformRangeFunction(ParseState *pstate,
  58                                            RangeFunction *r);
  59 static Node *transformFromClauseItem(ParseState *pstate, Node *n,
  60                                                 RangeTblEntry **top_rte, int *top_rti,
  61                                                 List **relnamespace,
  62                                                 Relids *containedRels);
  63 static Node *buildMergedJoinVar(ParseState *pstate, JoinType jointype,
  64                                    Var *l_colvar, Var *r_colvar);
  65 static TargetEntry *findTargetlistEntry(ParseState *pstate, Node *node,
  66                                         List **tlist, int clause);
  67
  68
  69 /*
  70  * transformFromClause -
  71  *        Process the FROM clause and add items to the query's range table,
  72  *        joinlist, and namespaces.
  73  *
  74  * Note: we assume that pstate's p_rtable, p_joinlist, p_relnamespace, and
  75  * p_varnamespace lists were initialized to NIL when the pstate was created.
  76  * We will add onto any entries already present --- this is needed for rule
  77  * processing, as well as for UPDATE and DELETE.
  78  *
  79  * The range table may grow still further when we transform the expressions
  80  * in the query's quals and target list. (This is possible because in
  81  * POSTQUEL, we allowed references to relations not specified in the
  82  * from-clause.  PostgreSQL keeps this extension to standard SQL.)
  83  */
  84 void
  85 transformFromClause(ParseState *pstate, List *frmList)
  86 {
  87         ListCell   *fl;
  88
  89         /*
  90          * The grammar will have produced a list of RangeVars, RangeSubselects,
  91          * RangeFunctions, and/or JoinExprs. Transform each one (possibly adding
  92          * entries to the rtable), check for duplicate refnames, and then add it
  93          * to the joinlist and namespaces.
  94          */
  95         foreach(fl, frmList)
  96         {
  97                 Node       *n = lfirst(fl);
  98                 RangeTblEntry *rte;
  99                 int                     rtindex;
 100                 List       *relnamespace;
 101                 Relids          containedRels;
 102
 103                 n = transformFromClauseItem(pstate, n,
 104                                                                         &rte,
 105                                                                         &rtindex,
 106                                                                         &relnamespace,
 107                                                                         &containedRels);
 108                 checkNameSpaceConflicts(pstate, pstate->p_relnamespace, relnamespace);
 109                 pstate->p_joinlist = lappend(pstate->p_joinlist, n);
 110                 pstate->p_relnamespace = list_concat(pstate->p_relnamespace,
 111                                                                                          relnamespace);
 112                 pstate->p_varnamespace = lappend(pstate->p_varnamespace, rte);
 113                 bms_free(containedRels);
 114         }
 115 }
 116
 117 /*
 118  * setTargetTable
 119  *        Add the target relation of INSERT/UPDATE/DELETE to the range table,
 120  *        and make the special links to it in the ParseState.
 121  *
 122  *        We also open the target relation and acquire a write lock on it.
 123  *        This must be done before processing the FROM list, in case the target
 124  *        is also mentioned as a source relation --- we want to be sure to grab
 125  *        the write lock before any read lock.
 126  *
 127  *        If alsoSource is true, add the target to the query's joinlist and
 128  *        namespace.  For INSERT, we don't want the target to be joined to;
 129  *        it's a destination of tuples, not a source.   For UPDATE/DELETE,
 130  *        we do need to scan or join the target.  (NOTE: we do not bother
 131  *        to check for namespace conflict; we assume that the namespace was
 132  *        initially empty in these cases.)
 133  *
 134  *        Finally, we mark the relation as requiring the permissions specified
 135  *        by requiredPerms.
 136  *
 137  *        Returns the rangetable index of the target relation.
 138  */
 139 int
 140 setTargetTable(ParseState *pstate, RangeVar *relation,
 141                            bool inh, bool alsoSource, AclMode requiredPerms)
 142 {
 143         RangeTblEntry *rte;
 144         int                     rtindex;
 145
 146         /* Close old target; this could only happen for multi-action rules */
 147         if (pstate->p_target_relation != NULL)
 148                 heap_close(pstate->p_target_relation, NoLock);
 149
 150         /*
 151          * Open target rel and grab suitable lock (which we will hold till end of
 152          * transaction).
 153          *
 154          * analyze.c will eventually do the corresponding heap_close(), but *not*
 155          * release the lock.
 156          */
 157         pstate->p_target_relation = heap_openrv(relation, RowExclusiveLock);
 158
 159         /*
 160          * Now build an RTE.
 161          */
 162         rte = addRangeTableEntryForRelation(pstate, pstate->p_target_relation,
 163                                                                                 relation->alias, inh, false);
 164         pstate->p_target_rangetblentry = rte;
 165
 166         /* assume new rte is at end */
 167         rtindex = list_length(pstate->p_rtable);
 168         Assert(rte == rt_fetch(rtindex, pstate->p_rtable));
 169
 170         /*
 171          * Override addRangeTableEntry's default ACL_SELECT permissions check, and
 172          * instead mark target table as requiring exactly the specified
 173          * permissions.
 174          *
 175          * If we find an explicit reference to the rel later during parse
 176          * analysis, scanRTEForColumn will add the ACL_SELECT bit back again. That
 177          * can't happen for INSERT but it is possible for UPDATE and DELETE.
 178          */
 179         rte->requiredPerms = requiredPerms;
 180
 181         /*
 182          * If UPDATE/DELETE, add table to joinlist and namespaces.
 183          */
 184         if (alsoSource)
 185                 addRTEtoQuery(pstate, rte, true, true, true);
 186
 187         return rtindex;
 188 }
 189
 190 /*
 191  * Simplify InhOption (yes/no/default) into boolean yes/no.
 192  *
 193  * The reason we do things this way is that we don't want to examine the
 194  * SQL_inheritance option flag until parse_analyze is run.      Otherwise,
 195  * we'd do the wrong thing with query strings that intermix SET commands
 196  * with queries.
 197  */
 198 bool
 199 interpretInhOption(InhOption inhOpt)
 200 {
 201         switch (inhOpt)
 202         {
 203                 case INH_NO:
 204                         return false;
 205                 case INH_YES:
 206                         return true;
 207                 case INH_DEFAULT:
 208                         return SQL_inheritance;
 209         }
 210         elog(ERROR, "bogus InhOption value: %d", inhOpt);
 211         return false;                           /* keep compiler quiet */
 212 }
 213
 214 /*
 215  * Given an enum that indicates whether WITH / WITHOUT OIDS was
 216  * specified by the user, return true iff the specified table/result
 217  * set should be created with OIDs. This needs to be done after
 218  * parsing the query string because the return value can depend upon
 219  * the default_with_oids GUC var.
 220  */
 221 bool
 222 interpretOidsOption(ContainsOids opt)
 223 {
 224         switch (opt)
 225         {
 226                 case MUST_HAVE_OIDS:
 227                         return true;
 228
 229                 case MUST_NOT_HAVE_OIDS:
 230                         return false;
 231
 232                 case DEFAULT_OIDS:
 233                         return default_with_oids;
 234         }
 235
 236         elog(ERROR, "bogus ContainsOids value: %d", opt);
 237         return false;                           /* keep compiler quiet */
 238 }
 239
 240 /*
 241  * Extract all not-in-common columns from column lists of a source table
 242  */
 243 static void
 244 extractRemainingColumns(List *common_colnames,
 245                                                 List *src_colnames, List *src_colvars,
 246                                                 List **res_colnames, List **res_colvars)
 247 {
 248         List       *new_colnames = NIL;
 249         List       *new_colvars = NIL;
 250         ListCell   *lnames,
 251                            *lvars;
 252
 253         Assert(list_length(src_colnames) == list_length(src_colvars));
 254
 255         forboth(lnames, src_colnames, lvars, src_colvars)
 256         {
 257                 char       *colname = strVal(lfirst(lnames));
 258                 bool            match = false;
 259                 ListCell   *cnames;
 260
 261                 foreach(cnames, common_colnames)
 262                 {
 263                         char       *ccolname = strVal(lfirst(cnames));
 264
 265                         if (strcmp(colname, ccolname) == 0)
 266                         {
 267                                 match = true;
 268                                 break;
 269                         }
 270                 }
 271
 272                 if (!match)
 273                 {
 274                         new_colnames = lappend(new_colnames, lfirst(lnames));
 275                         new_colvars = lappend(new_colvars, lfirst(lvars));
 276                 }
 277         }
 278
 279         *res_colnames = new_colnames;
 280         *res_colvars = new_colvars;
 281 }
 282
 283 /* transformJoinUsingClause()
 284  *        Build a complete ON clause from a partially-transformed USING list.
 285  *        We are given lists of nodes representing left and right match columns.
 286  *        Result is a transformed qualification expression.
 287  */
 288 static Node *
 289 transformJoinUsingClause(ParseState *pstate, List *leftVars, List *rightVars)
 290 {
 291         Node       *result = NULL;
 292         ListCell   *lvars,
 293                            *rvars;
 294
 295         /*
 296          * We cheat a little bit here by building an untransformed operator tree
 297          * whose leaves are the already-transformed Vars.  This is OK because
 298          * transformExpr() won't complain about already-transformed subnodes.
 299          */
 300         forboth(lvars, leftVars, rvars, rightVars)
 301         {
 302                 Node       *lvar = (Node *) lfirst(lvars);
 303                 Node       *rvar = (Node *) lfirst(rvars);
 304                 A_Expr     *e;
 305
 306                 e = makeSimpleA_Expr(AEXPR_OP, "=", copyObject(lvar), copyObject(rvar));
 307
 308                 if (result == NULL)
 309                         result = (Node *) e;
 310                 else
 311                 {
 312                         A_Expr     *a;
 313
 314                         a = makeA_Expr(AEXPR_AND, NIL, result, (Node *) e);
 315                         result = (Node *) a;
 316                 }
 317         }
 318
 319         /*
 320          * Since the references are already Vars, and are certainly from the input
 321          * relations, we don't have to go through the same pushups that
 322          * transformJoinOnClause() does.  Just invoke transformExpr() to fix up
 323          * the operators, and we're done.
 324          */
 325         result = transformExpr(pstate, result);
 326
 327         result = coerce_to_boolean(pstate, result, "JOIN/USING");
 328
 329         return result;
 330 }
 331
 332 /* transformJoinOnClause()
 333  *        Transform the qual conditions for JOIN/ON.
 334  *        Result is a transformed qualification expression.
 335  */
 336 static Node *
 337 transformJoinOnClause(ParseState *pstate, JoinExpr *j,
 338                                           RangeTblEntry *l_rte,
 339                                           RangeTblEntry *r_rte,
 340                                           List *relnamespace,
 341                                           Relids containedRels)
 342 {
 343         Node       *result;
 344         List       *save_relnamespace;
 345         List       *save_varnamespace;
 346         Relids          clause_varnos;
 347         int                     varno;
 348
 349         /*
 350          * This is a tad tricky, for two reasons.  First, the namespace that the
 351          * join expression should see is just the two subtrees of the JOIN plus
 352          * any outer references from upper pstate levels.  So, temporarily set
 353          * this pstate's namespace accordingly.  (We need not check for refname
 354          * conflicts, because transformFromClauseItem() already did.) NOTE: this
 355          * code is OK only because the ON clause can't legally alter the namespace
 356          * by causing implicit relation refs to be added.
 357          */
 358         save_relnamespace = pstate->p_relnamespace;
 359         save_varnamespace = pstate->p_varnamespace;
 360
 361         pstate->p_relnamespace = relnamespace;
 362         pstate->p_varnamespace = list_make2(l_rte, r_rte);
 363
 364         result = transformWhereClause(pstate, j->quals, "JOIN/ON");
 365
 366         pstate->p_relnamespace = save_relnamespace;
 367         pstate->p_varnamespace = save_varnamespace;
 368
 369         /*
 370          * Second, we need to check that the ON condition doesn't refer to any
 371          * rels outside the input subtrees of the JOIN.  It could do that despite
 372          * our hack on the namespace if it uses fully-qualified names. So, grovel
 373          * through the transformed clause and make sure there are no bogus
 374          * references.  (Outer references are OK, and are ignored here.)
 375          */
 376         clause_varnos = pull_varnos(result);
 377         clause_varnos = bms_del_members(clause_varnos, containedRels);
 378         if ((varno = bms_first_member(clause_varnos)) >= 0)
 379         {
 380                 ereport(ERROR,
 381                                 (errcode(ERRCODE_INVALID_COLUMN_REFERENCE),
 382                  errmsg("JOIN/ON clause refers to \"%s\", which is not part of JOIN",
 383                                 rt_fetch(varno, pstate->p_rtable)->eref->aliasname)));
 384         }
 385         bms_free(clause_varnos);
 386
 387         return result;
 388 }
 389
 390 /*
 391  * transformTableEntry --- transform a RangeVar (simple relation reference)
 392  */
 393 static RangeTblEntry *
 394 transformTableEntry(ParseState *pstate, RangeVar *r)
 395 {
 396         RangeTblEntry *rte;
 397
 398         /*
 399          * mark this entry to indicate it comes from the FROM clause. In SQL, the
 400          * target list can only refer to range variables specified in the from
 401          * clause but we follow the more powerful POSTQUEL semantics and
 402          * automatically generate the range variable if not specified. However
 403          * there are times we need to know whether the entries are legitimate.
 404          */
 405         rte = addRangeTableEntry(pstate, r, r->alias,
 406                                                          interpretInhOption(r->inhOpt), true);
 407
 408         return rte;
 409 }
 410
 411
 412 /*
 413  * transformRangeSubselect --- transform a sub-SELECT appearing in FROM
 414  */
 415 static RangeTblEntry *
 416 transformRangeSubselect(ParseState *pstate, RangeSubselect *r)
 417 {
 418         List       *parsetrees;
 419         Query      *query;
 420         RangeTblEntry *rte;
 421
 422         /*
 423          * We require user to supply an alias for a subselect, per SQL92. To relax
 424          * this, we'd have to be prepared to gin up a unique alias for an
 425          * unlabeled subselect.
 426          */
 427         if (r->alias == NULL)
 428                 ereport(ERROR,
 429                                 (errcode(ERRCODE_SYNTAX_ERROR),
 430                                  errmsg("subquery in FROM must have an alias")));
 431
 432         /*
 433          * Analyze and transform the subquery.
 434          */
 435         parsetrees = parse_sub_analyze(r->subquery, pstate);
 436
 437         /*
 438          * Check that we got something reasonable.      Most of these conditions are
 439          * probably impossible given restrictions of the grammar, but check 'em
 440          * anyway.
 441          */
 442         if (list_length(parsetrees) != 1)
 443                 elog(ERROR, "unexpected parse analysis result for subquery in FROM");
 444         query = (Query *) linitial(parsetrees);
 445         if (query == NULL || !IsA(query, Query))
 446                 elog(ERROR, "unexpected parse analysis result for subquery in FROM");
 447
 448         if (query->commandType != CMD_SELECT)
 449                 elog(ERROR, "expected SELECT query from subquery in FROM");
 450         if (query->resultRelation != 0 || query->into != NULL)
 451                 ereport(ERROR,
 452                                 (errcode(ERRCODE_SYNTAX_ERROR),
 453                                  errmsg("subquery in FROM may not have SELECT INTO")));
 454
 455         /*
 456          * The subquery cannot make use of any variables from FROM items created
 457          * earlier in the current query.  Per SQL92, the scope of a FROM item does
 458          * not include other FROM items.  Formerly we hacked the namespace so that
 459          * the other variables weren't even visible, but it seems more useful to
 460          * leave them visible and give a specific error message.
 461          *
 462          * XXX this will need further work to support SQL99's LATERAL() feature,
 463          * wherein such references would indeed be legal.
 464          *
 465          * We can skip groveling through the subquery if there's not anything
 466          * visible in the current query.  Also note that outer references are OK.
 467          */
 468         if (pstate->p_relnamespace || pstate->p_varnamespace)
 469         {
 470                 if (contain_vars_of_level((Node *) query, 1))
 471                         ereport(ERROR,
 472                                         (errcode(ERRCODE_INVALID_COLUMN_REFERENCE),
 473                                          errmsg("subquery in FROM may not refer to other relations of same query level")));
 474         }
 475
 476         /*
 477          * OK, build an RTE for the subquery.
 478          */
 479         rte = addRangeTableEntryForSubquery(pstate, query, r->alias, true);
 480
 481         return rte;
 482 }
 483
 484
 485 /*
 486  * transformRangeFunction --- transform a function call appearing in FROM
 487  */
 488 static RangeTblEntry *
 489 transformRangeFunction(ParseState *pstate, RangeFunction *r)
 490 {
 491         Node       *funcexpr;
 492         char       *funcname;
 493         RangeTblEntry *rte;
 494
 495         /*
 496          * Get function name for possible use as alias.  We use the same
 497          * transformation rules as for a SELECT output expression.      For a FuncCall
 498          * node, the result will be the function name, but it is possible for the
 499          * grammar to hand back other node types.
 500          */
 501         funcname = FigureColname(r->funccallnode);
 502
 503         /*
 504          * Transform the raw expression.
 505          */
 506         funcexpr = transformExpr(pstate, r->funccallnode);
 507
 508         /*
 509          * The function parameters cannot make use of any variables from other
 510          * FROM items.  (Compare to transformRangeSubselect(); the coding is
 511          * different though because we didn't parse as a sub-select with its own
 512          * level of namespace.)
 513          *
 514          * XXX this will need further work to support SQL99's LATERAL() feature,
 515          * wherein such references would indeed be legal.
 516          */
 517         if (pstate->p_relnamespace || pstate->p_varnamespace)
 518         {
 519                 if (contain_vars_of_level(funcexpr, 0))
 520                         ereport(ERROR,
 521                                         (errcode(ERRCODE_INVALID_COLUMN_REFERENCE),
 522                                          errmsg("function expression in FROM may not refer to other relations of same query level")));
 523         }
 524
 525         /*
 526          * Disallow aggregate functions in the expression.      (No reason to postpone
 527          * this check until parseCheckAggregates.)
 528          */
 529         if (pstate->p_hasAggs)
 530         {
 531                 if (checkExprHasAggs(funcexpr))
 532                         ereport(ERROR,
 533                                         (errcode(ERRCODE_GROUPING_ERROR),
 534                                          errmsg("cannot use aggregate function in function expression in FROM")));
 535         }
 536
 537         /*
 538          * If a coldeflist is supplied, ensure it defines a legal set of names (no
 539          * duplicates) and datatypes (no pseudo-types, for instance).
 540          */
 541         if (r->coldeflist)
 542         {
 543                 TupleDesc       tupdesc;
 544
 545                 tupdesc = BuildDescForRelation(r->coldeflist);
 546                 CheckAttributeNamesTypes(tupdesc, RELKIND_COMPOSITE_TYPE);
 547         }
 548
 549         /*
 550          * OK, build an RTE for the function.
 551          */
 552         rte = addRangeTableEntryForFunction(pstate, funcname, funcexpr,
 553                                                                                 r, true);
 554
 555         return rte;
 556 }
 557
 558
 559 /*
 560  * transformFromClauseItem -
 561  *        Transform a FROM-clause item, adding any required entries to the
 562  *        range table list being built in the ParseState, and return the
 563  *        transformed item ready to include in the joinlist and namespaces.
 564  *        This routine can recurse to handle SQL92 JOIN expressions.
 565  *
 566  * The function return value is the node to add to the jointree (a
 567  * RangeTblRef or JoinExpr).  Additional output parameters are:
 568  *
 569  * *top_rte: receives the RTE corresponding to the jointree item.
 570  * (We could extract this from the function return node, but it saves cycles
 571  * to pass it back separately.)
 572  *
 573  * *top_rti: receives the rangetable index of top_rte.  (Ditto.)
 574  *
 575  * *relnamespace: receives a List of the RTEs exposed as relation names
 576  * by this item.
 577  *
 578  * *containedRels: receives a bitmap set of the rangetable indexes
 579  * of all the base and join relations represented in this jointree item.
 580  * This is needed for checking JOIN/ON conditions in higher levels.
 581  *
 582  * We do not need to pass back an explicit varnamespace value, because
 583  * in all cases the varnamespace contribution is exactly top_rte.
 584  */
 585 static Node *
 586 transformFromClauseItem(ParseState *pstate, Node *n,
 587                                                 RangeTblEntry **top_rte, int *top_rti,
 588                                                 List **relnamespace,
 589                                                 Relids *containedRels)
 590 {
 591         if (IsA(n, RangeVar))
 592         {
 593                 /* Plain relation reference */
 594                 RangeTblRef *rtr;
 595                 RangeTblEntry *rte;
 596                 int                     rtindex;
 597
 598                 rte = transformTableEntry(pstate, (RangeVar *) n);
 599                 /* assume new rte is at end */
 600                 rtindex = list_length(pstate->p_rtable);
 601                 Assert(rte == rt_fetch(rtindex, pstate->p_rtable));
 602                 *top_rte = rte;
 603                 *top_rti = rtindex;
 604                 *relnamespace = list_make1(rte);
 605                 *containedRels = bms_make_singleton(rtindex);
 606                 rtr = makeNode(RangeTblRef);
 607                 rtr->rtindex = rtindex;
 608                 return (Node *) rtr;
 609         }
 610         else if (IsA(n, RangeSubselect))
 611         {
 612                 /* sub-SELECT is like a plain relation */
 613                 RangeTblRef *rtr;
 614                 RangeTblEntry *rte;
 615                 int                     rtindex;
 616
 617                 rte = transformRangeSubselect(pstate, (RangeSubselect *) n);
 618                 /* assume new rte is at end */
 619                 rtindex = list_length(pstate->p_rtable);
 620                 Assert(rte == rt_fetch(rtindex, pstate->p_rtable));
 621                 *top_rte = rte;
 622                 *top_rti = rtindex;
 623                 *relnamespace = list_make1(rte);
 624                 *containedRels = bms_make_singleton(rtindex);
 625                 rtr = makeNode(RangeTblRef);
 626                 rtr->rtindex = rtindex;
 627                 return (Node *) rtr;
 628         }
 629         else if (IsA(n, RangeFunction))
 630         {
 631                 /* function is like a plain relation */
 632                 RangeTblRef *rtr;
 633                 RangeTblEntry *rte;
 634                 int                     rtindex;
 635
 636                 rte = transformRangeFunction(pstate, (RangeFunction *) n);
 637                 /* assume new rte is at end */
 638                 rtindex = list_length(pstate->p_rtable);
 639                 Assert(rte == rt_fetch(rtindex, pstate->p_rtable));
 640                 *top_rte = rte;
 641                 *top_rti = rtindex;
 642                 *relnamespace = list_make1(rte);
 643                 *containedRels = bms_make_singleton(rtindex);
 644                 rtr = makeNode(RangeTblRef);
 645                 rtr->rtindex = rtindex;
 646                 return (Node *) rtr;
 647         }
 648         else if (IsA(n, JoinExpr))
 649         {
 650                 /* A newfangled join expression */
 651                 JoinExpr   *j = (JoinExpr *) n;
 652                 RangeTblEntry *l_rte;
 653                 RangeTblEntry *r_rte;
 654                 int                     l_rtindex;
 655                 int                     r_rtindex;
 656                 Relids          l_containedRels,
 657                                         r_containedRels,
 658                                         my_containedRels;
 659                 List       *l_relnamespace,
 660                                    *r_relnamespace,
 661                                    *my_relnamespace,
 662                                    *l_colnames,
 663                                    *r_colnames,
 664                                    *res_colnames,
 665                                    *l_colvars,
 666                                    *r_colvars,
 667                                    *res_colvars;
 668                 RangeTblEntry *rte;
 669
 670                 /*
 671                  * Recursively process the left and right subtrees
 672                  */
 673                 j->larg = transformFromClauseItem(pstate, j->larg,
 674                                                                                   &l_rte,
 675                                                                                   &l_rtindex,
 676                                                                                   &l_relnamespace,
 677                                                                                   &l_containedRels);
 678                 j->rarg = transformFromClauseItem(pstate, j->rarg,
 679                                                                                   &r_rte,
 680                                                                                   &r_rtindex,
 681                                                                                   &r_relnamespace,
 682                                                                                   &r_containedRels);
 683
 684                 /*
 685                  * Check for conflicting refnames in left and right subtrees. Must do
 686                  * this because higher levels will assume I hand back a self-
 687                  * consistent namespace subtree.
 688                  */
 689                 checkNameSpaceConflicts(pstate, l_relnamespace, r_relnamespace);
 690
 691                 /*
 692                  * Generate combined relation membership info for possible use by
 693                  * transformJoinOnClause below.
 694                  */
 695                 my_relnamespace = list_concat(l_relnamespace, r_relnamespace);
 696                 my_containedRels = bms_join(l_containedRels, r_containedRels);
 697
 698                 pfree(r_relnamespace);  /* free unneeded list header */
 699
 700                 /*
 701                  * Extract column name and var lists from both subtrees
 702                  *
 703                  * Note: expandRTE returns new lists, safe for me to modify
 704                  */
 705                 expandRTE(l_rte, l_rtindex, 0, false,
 706                                   &l_colnames, &l_colvars);
 707                 expandRTE(r_rte, r_rtindex, 0, false,
 708                                   &r_colnames, &r_colvars);
 709
 710                 /*
 711                  * Natural join does not explicitly specify columns; must generate
 712                  * columns to join. Need to run through the list of columns from each
 713                  * table or join result and match up the column names. Use the first
 714                  * table, and check every column in the second table for a match.
 715                  * (We'll check that the matches were unique later on.) The result of
 716                  * this step is a list of column names just like an explicitly-written
 717                  * USING list.
 718                  */
 719                 if (j->isNatural)
 720                 {
 721                         List       *rlist = NIL;
 722                         ListCell   *lx,
 723                                            *rx;
 724
 725                         Assert(j->using == NIL);        /* shouldn't have USING() too */
 726
 727                         foreach(lx, l_colnames)
 728                         {
 729                                 char       *l_colname = strVal(lfirst(lx));
 730                                 Value      *m_name = NULL;
 731
 732                                 foreach(rx, r_colnames)
 733                                 {
 734                                         char       *r_colname = strVal(lfirst(rx));
 735
 736                                         if (strcmp(l_colname, r_colname) == 0)
 737                                         {
 738                                                 m_name = makeString(l_colname);
 739                                                 break;
 740                                         }
 741                                 }
 742
 743                                 /* matched a right column? then keep as join column... */
 744                                 if (m_name != NULL)
 745                                         rlist = lappend(rlist, m_name);
 746                         }
 747
 748                         j->using = rlist;
 749                 }
 750
 751                 /*
 752                  * Now transform the join qualifications, if any.
 753                  */
 754                 res_colnames = NIL;
 755                 res_colvars = NIL;
 756
 757                 if (j->using)
 758                 {
 759                         /*
 760                          * JOIN/USING (or NATURAL JOIN, as transformed above). Transform
 761                          * the list into an explicit ON-condition, and generate a list of
 762                          * merged result columns.
 763                          */
 764                         List       *ucols = j->using;
 765                         List       *l_usingvars = NIL;
 766                         List       *r_usingvars = NIL;
 767                         ListCell   *ucol;
 768
 769                         Assert(j->quals == NULL);       /* shouldn't have ON() too */
 770
 771                         foreach(ucol, ucols)
 772                         {
 773                                 char       *u_colname = strVal(lfirst(ucol));
 774                                 ListCell   *col;
 775                                 int                     ndx;
 776                                 int                     l_index = -1;
 777                                 int                     r_index = -1;
 778                                 Var                *l_colvar,
 779                                                    *r_colvar;
 780
 781                                 /* Check for USING(foo,foo) */
 782                                 foreach(col, res_colnames)
 783                                 {
 784                                         char       *res_colname = strVal(lfirst(col));
 785
 786                                         if (strcmp(res_colname, u_colname) == 0)
 787                                                 ereport(ERROR,
 788                                                                 (errcode(ERRCODE_DUPLICATE_COLUMN),
 789                                                                  errmsg("column name \"%s\" appears more than once in USING clause",
 790                                                                                 u_colname)));
 791                                 }
 792
 793                                 /* Find it in left input */
 794                                 ndx = 0;
 795                                 foreach(col, l_colnames)
 796                                 {
 797                                         char       *l_colname = strVal(lfirst(col));
 798
 799                                         if (strcmp(l_colname, u_colname) == 0)
 800                                         {
 801                                                 if (l_index >= 0)
 802                                                         ereport(ERROR,
 803                                                                         (errcode(ERRCODE_AMBIGUOUS_COLUMN),
 804                                                                          errmsg("common column name \"%s\" appears more than once in left table",
 805                                                                                         u_colname)));
 806                                                 l_index = ndx;
 807                                         }
 808                                         ndx++;
 809                                 }
 810                                 if (l_index < 0)
 811                                         ereport(ERROR,
 812                                                         (errcode(ERRCODE_UNDEFINED_COLUMN),
 813                                                          errmsg("column \"%s\" specified in USING clause does not exist in left table",
 814                                                                         u_colname)));
 815
 816                                 /* Find it in right input */
 817                                 ndx = 0;
 818                                 foreach(col, r_colnames)
 819                                 {
 820                                         char       *r_colname = strVal(lfirst(col));
 821
 822                                         if (strcmp(r_colname, u_colname) == 0)
 823                                         {
 824                                                 if (r_index >= 0)
 825                                                         ereport(ERROR,
 826                                                                         (errcode(ERRCODE_AMBIGUOUS_COLUMN),
 827                                                                          errmsg("common column name \"%s\" appears more than once in right table",
 828                                                                                         u_colname)));
 829                                                 r_index = ndx;
 830                                         }
 831                                         ndx++;
 832                                 }
 833                                 if (r_index < 0)
 834                                         ereport(ERROR,
 835                                                         (errcode(ERRCODE_UNDEFINED_COLUMN),
 836                                                          errmsg("column \"%s\" specified in USING clause does not exist in right table",
 837                                                                         u_colname)));
 838
 839                                 l_colvar = list_nth(l_colvars, l_index);
 840                                 l_usingvars = lappend(l_usingvars, l_colvar);
 841                                 r_colvar = list_nth(r_colvars, r_index);
 842                                 r_usingvars = lappend(r_usingvars, r_colvar);
 843
 844                                 res_colnames = lappend(res_colnames, lfirst(ucol));
 845                                 res_colvars = lappend(res_colvars,
 846                                                                           buildMergedJoinVar(pstate,
 847                                                                                                                  j->jointype,
 848                                                                                                                  l_colvar,
 849                                                                                                                  r_colvar));
 850                         }
 851
 852                         j->quals = transformJoinUsingClause(pstate,
 853                                                                                                 l_usingvars,
 854                                                                                                 r_usingvars);
 855                 }
 856                 else if (j->quals)
 857                 {
 858                         /* User-written ON-condition; transform it */
 859                         j->quals = transformJoinOnClause(pstate, j,
 860                                                                                          l_rte, r_rte,
 861                                                                                          my_relnamespace,
 862                                                                                          my_containedRels);
 863                 }
 864                 else
 865                 {
 866                         /* CROSS JOIN: no quals */
 867                 }
 868
 869                 /* Add remaining columns from each side to the output columns */
 870                 extractRemainingColumns(res_colnames,
 871                                                                 l_colnames, l_colvars,
 872                                                                 &l_colnames, &l_colvars);
 873                 extractRemainingColumns(res_colnames,
 874                                                                 r_colnames, r_colvars,
 875                                                                 &r_colnames, &r_colvars);
 876                 res_colnames = list_concat(res_colnames, l_colnames);
 877                 res_colvars = list_concat(res_colvars, l_colvars);
 878                 res_colnames = list_concat(res_colnames, r_colnames);
 879                 res_colvars = list_concat(res_colvars, r_colvars);
 880
 881                 /*
 882                  * Check alias (AS clause), if any.
 883                  */
 884                 if (j->alias)
 885                 {
 886                         if (j->alias->colnames != NIL)
 887                         {
 888                                 if (list_length(j->alias->colnames) > list_length(res_colnames))
 889                                         ereport(ERROR,
 890                                                         (errcode(ERRCODE_SYNTAX_ERROR),
 891                                                          errmsg("column alias list for \"%s\" has too many entries",
 892                                                                         j->alias->aliasname)));
 893                         }
 894                 }
 895
 896                 /*
 897                  * Now build an RTE for the result of the join
 898                  */
 899                 rte = addRangeTableEntryForJoin(pstate,
 900                                                                                 res_colnames,
 901                                                                                 j->jointype,
 902                                                                                 res_colvars,
 903                                                                                 j->alias,
 904                                                                                 true);
 905
 906                 /* assume new rte is at end */
 907                 j->rtindex = list_length(pstate->p_rtable);
 908                 Assert(rte == rt_fetch(j->rtindex, pstate->p_rtable));
 909
 910                 *top_rte = rte;
 911                 *top_rti = j->rtindex;
 912
 913                 /*
 914                  * Prepare returned namespace list.  If the JOIN has an alias then it
 915                  * hides the contained RTEs as far as the relnamespace goes;
 916                  * otherwise, put the contained RTEs and *not* the JOIN into
 917                  * relnamespace.
 918                  */
 919                 if (j->alias)
 920                 {
 921                         *relnamespace = list_make1(rte);
 922                         list_free(my_relnamespace);
 923                 }
 924                 else
 925                         *relnamespace = my_relnamespace;
 926
 927                 /*
 928                  * Include join RTE in returned containedRels set
 929                  */
 930                 *containedRels = bms_add_member(my_containedRels, j->rtindex);
 931
 932                 return (Node *) j;
 933         }
 934         else
 935                 elog(ERROR, "unrecognized node type: %d", (int) nodeTag(n));
 936         return NULL;                            /* can't get here, keep compiler quiet */
 937 }
 938
 939 /*
 940  * buildMergedJoinVar -
 941  *        generate a suitable replacement expression for a merged join column
 942  */
 943 static Node *
 944 buildMergedJoinVar(ParseState *pstate, JoinType jointype,
 945                                    Var *l_colvar, Var *r_colvar)
 946 {
 947         Oid                     outcoltype;
 948         int32           outcoltypmod;
 949         Node       *l_node,
 950                            *r_node,
 951                            *res_node;
 952
 953         /*
 954          * Choose output type if input types are dissimilar.
 955          */
 956         outcoltype = l_colvar->vartype;
 957         outcoltypmod = l_colvar->vartypmod;
 958         if (outcoltype != r_colvar->vartype)
 959         {
 960                 outcoltype = select_common_type(list_make2_oid(l_colvar->vartype,
 961                                                                                                            r_colvar->vartype),
 962                                                                                 "JOIN/USING");
 963                 outcoltypmod = -1;              /* ie, unknown */
 964         }
 965         else if (outcoltypmod != r_colvar->vartypmod)
 966         {
 967                 /* same type, but not same typmod */
 968                 outcoltypmod = -1;              /* ie, unknown */
 969         }
 970
 971         /*
 972          * Insert coercion functions if needed.  Note that a difference in typmod
 973          * can only happen if input has typmod but outcoltypmod is -1. In that
 974          * case we insert a RelabelType to clearly mark that result's typmod is
 975          * not same as input.  We never need coerce_type_typmod.
 976          */
 977         if (l_colvar->vartype != outcoltype)
 978                 l_node = coerce_type(pstate, (Node *) l_colvar, l_colvar->vartype,
 979                                                          outcoltype, outcoltypmod,
 980                                                          COERCION_IMPLICIT, COERCE_IMPLICIT_CAST);
 981         else if (l_colvar->vartypmod != outcoltypmod)
 982                 l_node = (Node *) makeRelabelType((Expr *) l_colvar,
 983                                                                                   outcoltype, outcoltypmod,
 984                                                                                   COERCE_IMPLICIT_CAST);
 985         else
 986                 l_node = (Node *) l_colvar;
 987
 988         if (r_colvar->vartype != outcoltype)
 989                 r_node = coerce_type(pstate, (Node *) r_colvar, r_colvar->vartype,
 990                                                          outcoltype, outcoltypmod,
 991                                                          COERCION_IMPLICIT, COERCE_IMPLICIT_CAST);
 992         else if (r_colvar->vartypmod != outcoltypmod)
 993                 r_node = (Node *) makeRelabelType((Expr *) r_colvar,
 994                                                                                   outcoltype, outcoltypmod,
 995                                                                                   COERCE_IMPLICIT_CAST);
 996         else
 997                 r_node = (Node *) r_colvar;
 998
 999         /*
1000          * Choose what to emit
1001          */
1002         switch (jointype)
1003         {
1004                 case JOIN_INNER:
1005
1006                         /*
1007                          * We can use either var; prefer non-coerced one if available.
1008                          */
1009                         if (IsA(l_node, Var))
1010                                 res_node = l_node;
1011                         else if (IsA(r_node, Var))
1012                                 res_node = r_node;
1013                         else
1014                                 res_node = l_node;
1015                         break;
1016                 case JOIN_LEFT:
1017                         /* Always use left var */
1018                         res_node = l_node;
1019                         break;
1020                 case JOIN_RIGHT:
1021                         /* Always use right var */
1022                         res_node = r_node;
1023                         break;
1024                 case JOIN_FULL:
1025                         {
1026                                 /*
1027                                  * Here we must build a COALESCE expression to ensure that the
1028                                  * join output is non-null if either input is.
1029                                  */
1030                                 CoalesceExpr *c = makeNode(CoalesceExpr);
1031
1032                                 c->coalescetype = outcoltype;
1033                                 c->args = list_make2(l_node, r_node);
1034                                 res_node = (Node *) c;
1035                                 break;
1036                         }
1037                 default:
1038                         elog(ERROR, "unrecognized join type: %d", (int) jointype);
1039                         res_node = NULL;        /* keep compiler quiet */
1040                         break;
1041         }
1042
1043         return res_node;
1044 }
1045
1046
1047 /*
1048  * transformWhereClause -
1049  *        Transform the qualification and make sure it is of type boolean.
1050  *        Used for WHERE and allied clauses.
1051  *
1052  * constructName does not affect the semantics, but is used in error messages
1053  */
1054 Node *
1055 transformWhereClause(ParseState *pstate, Node *clause,
1056                                          const char *constructName)
1057 {
1058         Node       *qual;
1059
1060         if (clause == NULL)
1061                 return NULL;
1062
1063         qual = transformExpr(pstate, clause);
1064
1065         qual = coerce_to_boolean(pstate, qual, constructName);
1066
1067         return qual;
1068 }
1069
1070
1071 /*
1072  * transformLimitClause -
1073  *        Transform the expression and make sure it is of type integer.
1074  *        Used for LIMIT and allied clauses.
1075  *
1076  * constructName does not affect the semantics, but is used in error messages
1077  */
1078 Node *
1079 transformLimitClause(ParseState *pstate, Node *clause,
1080                                          const char *constructName)
1081 {
1082         Node       *qual;
1083
1084         if (clause == NULL)
1085                 return NULL;
1086
1087         qual = transformExpr(pstate, clause);
1088
1089         qual = coerce_to_integer(pstate, qual, constructName);
1090
1091         /*
1092          * LIMIT can't refer to any vars or aggregates of the current query; we
1093          * don't allow subselects either (though that case would at least be
1094          * sensible)
1095          */
1096         if (contain_vars_of_level(qual, 0))
1097         {
1098                 ereport(ERROR,
1099                                 (errcode(ERRCODE_INVALID_COLUMN_REFERENCE),
1100                 /* translator: %s is name of a SQL construct, eg LIMIT */
1101                                  errmsg("argument of %s must not contain variables",
1102                                                 constructName)));
1103         }
1104         if (checkExprHasAggs(qual))
1105         {
1106                 ereport(ERROR,
1107                                 (errcode(ERRCODE_GROUPING_ERROR),
1108                 /* translator: %s is name of a SQL construct, eg LIMIT */
1109                                  errmsg("argument of %s must not contain aggregates",
1110                                                 constructName)));
1111         }
1112         if (contain_subplans(qual))
1113         {
1114                 ereport(ERROR,
1115                                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1116                 /* translator: %s is name of a SQL construct, eg LIMIT */
1117                                  errmsg("argument of %s must not contain subqueries",
1118                                                 constructName)));
1119         }
1120
1121         return qual;
1122 }
1123
1124
1125 /*
1126  *      findTargetlistEntry -
1127  *        Returns the targetlist entry matching the given (untransformed) node.
1128  *        If no matching entry exists, one is created and appended to the target
1129  *        list as a "resjunk" node.
1130  *
1131  * node         the ORDER BY, GROUP BY, or DISTINCT ON expression to be matched
1132  * tlist        the target list (passed by reference so we can append to it)
1133  * clause       identifies clause type being processed
1134  */
1135 static TargetEntry *
1136 findTargetlistEntry(ParseState *pstate, Node *node, List **tlist, int clause)
1137 {
1138         TargetEntry *target_result = NULL;
1139         ListCell   *tl;
1140         Node       *expr;
1141
1142         /*----------
1143          * Handle two special cases as mandated by the SQL92 spec:
1144          *
1145          * 1. Bare ColumnName (no qualifier or subscripts)
1146          *        For a bare identifier, we search for a matching column name
1147          *        in the existing target list.  Multiple matches are an error
1148          *        unless they refer to identical values; for example,
1149          *        we allow      SELECT a, a FROM table ORDER BY a
1150          *        but not       SELECT a AS b, b FROM table ORDER BY b
1151          *        If no match is found, we fall through and treat the identifier
1152          *        as an expression.
1153          *        For GROUP BY, it is incorrect to match the grouping item against
1154          *        targetlist entries: according to SQL92, an identifier in GROUP BY
1155          *        is a reference to a column name exposed by FROM, not to a target
1156          *        list column.  However, many implementations (including pre-7.0
1157          *        PostgreSQL) accept this anyway.  So for GROUP BY, we look first
1158          *        to see if the identifier matches any FROM column name, and only
1159          *        try for a targetlist name if it doesn't.  This ensures that we
1160          *        adhere to the spec in the case where the name could be both.
1161          *        DISTINCT ON isn't in the standard, so we can do what we like there;
1162          *        we choose to make it work like ORDER BY, on the rather flimsy
1163          *        grounds that ordinary DISTINCT works on targetlist entries.
1164          *
1165          * 2. IntegerConstant
1166          *        This means to use the n'th item in the existing target list.
1167          *        Note that it would make no sense to order/group/distinct by an
1168          *        actual constant, so this does not create a conflict with our
1169          *        extension to order/group by an expression.
1170          *        GROUP BY column-number is not allowed by SQL92, but since
1171          *        the standard has no other behavior defined for this syntax,
1172          *        we may as well accept this common extension.
1173          *
1174          * Note that pre-existing resjunk targets must not be used in either case,
1175          * since the user didn't write them in his SELECT list.
1176          *
1177          * If neither special case applies, fall through to treat the item as
1178          * an expression.
1179          *----------
1180          */
1181         if (IsA(node, ColumnRef) &&
1182                 list_length(((ColumnRef *) node)->fields) == 1)
1183         {
1184                 char       *name = strVal(linitial(((ColumnRef *) node)->fields));
1185
1186                 if (clause == GROUP_CLAUSE)
1187                 {
1188                         /*
1189                          * In GROUP BY, we must prefer a match against a FROM-clause
1190                          * column to one against the targetlist.  Look to see if there is
1191                          * a matching column.  If so, fall through to let transformExpr()
1192                          * do the rest.  NOTE: if name could refer ambiguously to more
1193                          * than one column name exposed by FROM, colNameToVar will
1194                          * ereport(ERROR).      That's just what we want here.
1195                          *
1196                          * Small tweak for 7.4.3: ignore matches in upper query levels.
1197                          * This effectively changes the search order for bare names to (1)
1198                          * local FROM variables, (2) local targetlist aliases, (3) outer
1199                          * FROM variables, whereas before it was (1) (3) (2). SQL92 and
1200                          * SQL99 do not allow GROUPing BY an outer reference, so this
1201                          * breaks no cases that are legal per spec, and it seems a more
1202                          * self-consistent behavior.
1203                          */
1204                         if (colNameToVar(pstate, name, true) != NULL)
1205                                 name = NULL;
1206                 }
1207
1208                 if (name != NULL)
1209                 {
1210                         foreach(tl, *tlist)
1211                         {
1212                                 TargetEntry *tle = (TargetEntry *) lfirst(tl);
1213
1214                                 if (!tle->resjunk &&
1215                                         strcmp(tle->resname, name) == 0)
1216                                 {
1217                                         if (target_result != NULL)
1218                                         {
1219                                                 if (!equal(target_result->expr, tle->expr))
1220                                                         ereport(ERROR,
1221                                                                         (errcode(ERRCODE_AMBIGUOUS_COLUMN),
1222
1223                                                         /*
1224                                                          * translator: first %s is name of a SQL
1225                                                          * construct, eg ORDER BY
1226                                                          */
1227                                                                          errmsg("%s \"%s\" is ambiguous",
1228                                                                                         clauseText[clause], name)));
1229                                         }
1230                                         else
1231                                                 target_result = tle;
1232                                         /* Stay in loop to check for ambiguity */
1233                                 }
1234                         }
1235                         if (target_result != NULL)
1236                                 return target_result;   /* return the first match */
1237                 }
1238         }
1239         if (IsA(node, A_Const))
1240         {
1241                 Value      *val = &((A_Const *) node)->val;
1242                 int                     targetlist_pos = 0;
1243                 int                     target_pos;
1244
1245                 if (!IsA(val, Integer))
1246                         ereport(ERROR,
1247                                         (errcode(ERRCODE_SYNTAX_ERROR),
1248                         /* translator: %s is name of a SQL construct, eg ORDER BY */
1249                                          errmsg("non-integer constant in %s",
1250                                                         clauseText[clause])));
1251                 target_pos = intVal(val);
1252                 foreach(tl, *tlist)
1253                 {
1254                         TargetEntry *tle = (TargetEntry *) lfirst(tl);
1255
1256                         if (!tle->resjunk)
1257                         {
1258                                 if (++targetlist_pos == target_pos)
1259                                         return tle; /* return the unique match */
1260                         }
1261                 }
1262                 ereport(ERROR,
1263                                 (errcode(ERRCODE_INVALID_COLUMN_REFERENCE),
1264                 /* translator: %s is name of a SQL construct, eg ORDER BY */
1265                                  errmsg("%s position %d is not in select list",
1266                                                 clauseText[clause], target_pos)));
1267         }
1268
1269         /*
1270          * Otherwise, we have an expression (this is a Postgres extension not
1271          * found in SQL92).  Convert the untransformed node to a transformed
1272          * expression, and search for a match in the tlist. NOTE: it doesn't
1273          * really matter whether there is more than one match.  Also, we are
1274          * willing to match a resjunk target here, though the above cases must
1275          * ignore resjunk targets.
1276          */
1277         expr = transformExpr(pstate, node);
1278
1279         foreach(tl, *tlist)
1280         {
1281                 TargetEntry *tle = (TargetEntry *) lfirst(tl);
1282
1283                 if (equal(expr, tle->expr))
1284                         return tle;
1285         }
1286
1287         /*
1288          * If no matches, construct a new target entry which is appended to the
1289          * end of the target list.      This target is given resjunk = TRUE so that it
1290          * will not be projected into the final tuple.
1291          */
1292         target_result = transformTargetEntry(pstate, node, expr, NULL, true);
1293
1294         *tlist = lappend(*tlist, target_result);
1295
1296         return target_result;
1297 }
1298
1299
1300 /*
1301  * transformGroupClause -
1302  *        transform a GROUP BY clause
1303  *
1304  * GROUP BY items will be added to the targetlist (as resjunk columns)
1305  * if not already present, so the targetlist must be passed by reference.
1306  */
1307 List *
1308 transformGroupClause(ParseState *pstate, List *grouplist,
1309                                          List **targetlist, List *sortClause)
1310 {
1311         List       *glist = NIL;
1312         ListCell   *gl;
1313         ListCell   *sortItem;
1314
1315         sortItem = list_head(sortClause);
1316
1317         foreach(gl, grouplist)
1318         {
1319                 TargetEntry *tle;
1320                 Oid                     restype;
1321                 Oid                     ordering_op;
1322                 GroupClause *grpcl;
1323
1324                 tle = findTargetlistEntry(pstate, lfirst(gl),
1325                                                                   targetlist, GROUP_CLAUSE);
1326
1327                 /* avoid making duplicate grouplist entries */
1328                 if (targetIsInSortList(tle, glist))
1329                         continue;
1330
1331                 /* if tlist item is an UNKNOWN literal, change it to TEXT */
1332                 restype = exprType((Node *) tle->expr);
1333
1334                 if (restype == UNKNOWNOID)
1335                 {
1336                         tle->expr = (Expr *) coerce_type(pstate, (Node *) tle->expr,
1337                                                                                          restype, TEXTOID, -1,
1338                                                                                          COERCION_IMPLICIT,
1339                                                                                          COERCE_IMPLICIT_CAST);
1340                         restype = TEXTOID;
1341                 }
1342
1343                 /*
1344                  * If the GROUP BY clause matches the ORDER BY clause, we want to
1345                  * adopt the ordering operators from the latter rather than using the
1346                  * default ops.  This allows "GROUP BY foo ORDER BY foo DESC" to be
1347                  * done with only one sort step.  Note we are assuming that any
1348                  * user-supplied ordering operator will bring equal values together,
1349                  * which is all that GROUP BY needs.
1350                  */
1351                 if (sortItem &&
1352                         ((SortClause *) lfirst(sortItem))->tleSortGroupRef ==
1353                         tle->ressortgroupref)
1354                 {
1355                         ordering_op = ((SortClause *) lfirst(sortItem))->sortop;
1356                         sortItem = lnext(sortItem);
1357                 }
1358                 else
1359                 {
1360                         ordering_op = ordering_oper_opid(restype);
1361                         sortItem = NULL;        /* disregard ORDER BY once match fails */
1362                 }
1363
1364                 grpcl = makeNode(GroupClause);
1365                 grpcl->tleSortGroupRef = assignSortGroupRef(tle, *targetlist);
1366                 grpcl->sortop = ordering_op;
1367                 glist = lappend(glist, grpcl);
1368         }
1369
1370         return glist;
1371 }
1372
1373 /*
1374  * transformSortClause -
1375  *        transform an ORDER BY clause
1376  *
1377  * ORDER BY items will be added to the targetlist (as resjunk columns)
1378  * if not already present, so the targetlist must be passed by reference.
1379  */
1380 List *
1381 transformSortClause(ParseState *pstate,
1382                                         List *orderlist,
1383                                         List **targetlist,
1384                                         bool resolveUnknown)
1385 {
1386         List       *sortlist = NIL;
1387         ListCell   *olitem;
1388
1389         foreach(olitem, orderlist)
1390         {
1391                 SortBy     *sortby = lfirst(olitem);
1392                 TargetEntry *tle;
1393
1394                 tle = findTargetlistEntry(pstate, sortby->node,
1395                                                                   targetlist, ORDER_CLAUSE);
1396
1397                 sortlist = addTargetToSortList(pstate, tle,
1398                                                                            sortlist, *targetlist,
1399                                                                            sortby->sortby_kind,
1400                                                                            sortby->useOp,
1401                                                                            resolveUnknown);
1402         }
1403
1404         return sortlist;
1405 }
1406
1407 /*
1408  * transformDistinctClause -
1409  *        transform a DISTINCT or DISTINCT ON clause
1410  *
1411  * Since we may need to add items to the query's sortClause list, that list
1412  * is passed by reference.      Likewise for the targetlist.
1413  */
1414 List *
1415 transformDistinctClause(ParseState *pstate, List *distinctlist,
1416                                                 List **targetlist, List **sortClause)
1417 {
1418         List       *result = NIL;
1419         ListCell   *slitem;
1420         ListCell   *dlitem;
1421
1422         /* No work if there was no DISTINCT clause */
1423         if (distinctlist == NIL)
1424                 return NIL;
1425
1426         if (linitial(distinctlist) == NULL)
1427         {
1428                 /* We had SELECT DISTINCT */
1429
1430                 /*
1431                  * All non-resjunk elements from target list that are not already in
1432                  * the sort list should be added to it.  (We don't really care what
1433                  * order the DISTINCT fields are checked in, so we can leave the
1434                  * user's ORDER BY spec alone, and just add additional sort keys to it
1435                  * to ensure that all targetlist items get sorted.)
1436                  */
1437                 *sortClause = addAllTargetsToSortList(pstate,
1438                                                                                           *sortClause,
1439                                                                                           *targetlist,
1440                                                                                           true);
1441
1442                 /*
1443                  * Now, DISTINCT list consists of all non-resjunk sortlist items.
1444                  * Actually, all the sortlist items had better be non-resjunk!
1445                  * Otherwise, user wrote SELECT DISTINCT with an ORDER BY item that
1446                  * does not appear anywhere in the SELECT targetlist, and we can't
1447                  * implement that with only one sorting pass...
1448                  */
1449                 foreach(slitem, *sortClause)
1450                 {
1451                         SortClause *scl = (SortClause *) lfirst(slitem);
1452                         TargetEntry *tle = get_sortgroupclause_tle(scl, *targetlist);
1453
1454                         if (tle->resjunk)
1455                                 ereport(ERROR,
1456                                                 (errcode(ERRCODE_INVALID_COLUMN_REFERENCE),
1457                                                  errmsg("for SELECT DISTINCT, ORDER BY expressions must appear in select list")));
1458                         else
1459                                 result = lappend(result, copyObject(scl));
1460                 }
1461         }
1462         else
1463         {
1464                 /* We had SELECT DISTINCT ON (expr, ...) */
1465
1466                 /*
1467                  * If the user writes both DISTINCT ON and ORDER BY, then the two
1468                  * expression lists must match (until one or the other runs out).
1469                  * Otherwise the ORDER BY requires a different sort order than the
1470                  * DISTINCT does, and we can't implement that with only one sort pass
1471                  * (and if we do two passes, the results will be rather
1472                  * unpredictable). However, it's OK to have more DISTINCT ON
1473                  * expressions than ORDER BY expressions; we can just add the extra
1474                  * DISTINCT values to the sort list, much as we did above for ordinary
1475                  * DISTINCT fields.
1476                  *
1477                  * Actually, it'd be OK for the common prefixes of the two lists to
1478                  * match in any order, but implementing that check seems like more
1479                  * trouble than it's worth.
1480                  */
1481                 ListCell   *nextsortlist = list_head(*sortClause);
1482
1483                 foreach(dlitem, distinctlist)
1484                 {
1485                         TargetEntry *tle;
1486
1487                         tle = findTargetlistEntry(pstate, lfirst(dlitem),
1488                                                                           targetlist, DISTINCT_ON_CLAUSE);
1489
1490                         if (nextsortlist != NULL)
1491                         {
1492                                 SortClause *scl = (SortClause *) lfirst(nextsortlist);
1493
1494                                 if (tle->ressortgroupref != scl->tleSortGroupRef)
1495                                         ereport(ERROR,
1496                                                         (errcode(ERRCODE_INVALID_COLUMN_REFERENCE),
1497                                                          errmsg("SELECT DISTINCT ON expressions must match initial ORDER BY expressions")));
1498                                 result = lappend(result, copyObject(scl));
1499                                 nextsortlist = lnext(nextsortlist);
1500                         }
1501                         else
1502                         {
1503                                 *sortClause = addTargetToSortList(pstate, tle,
1504                                                                                                   *sortClause, *targetlist,
1505                                                                                                   SORTBY_ASC, NIL, true);
1506
1507                                 /*
1508                                  * Probably, the tle should always have been added at the end
1509                                  * of the sort list ... but search to be safe.
1510                                  */
1511                                 foreach(slitem, *sortClause)
1512                                 {
1513                                         SortClause *scl = (SortClause *) lfirst(slitem);
1514
1515                                         if (tle->ressortgroupref == scl->tleSortGroupRef)
1516                                         {
1517                                                 result = lappend(result, copyObject(scl));
1518                                                 break;
1519                                         }
1520                                 }
1521                                 if (slitem == NULL)             /* should not happen */
1522                                         elog(ERROR, "failed to add DISTINCT ON clause to target list");
1523                         }
1524                 }
1525         }
1526
1527         return result;
1528 }
1529
1530 /*
1531  * addAllTargetsToSortList
1532  *              Make sure all non-resjunk targets in the targetlist are in the
1533  *              ORDER BY list, adding the not-yet-sorted ones to the end of the list.
1534  *              This is typically used to help implement SELECT DISTINCT.
1535  *
1536  * See addTargetToSortList for info about pstate and resolveUnknown inputs.
1537  *
1538  * Returns the updated ORDER BY list.
1539  */
1540 List *
1541 addAllTargetsToSortList(ParseState *pstate, List *sortlist,
1542                                                 List *targetlist, bool resolveUnknown)
1543 {
1544         ListCell   *l;
1545
1546         foreach(l, targetlist)
1547         {
1548                 TargetEntry *tle = (TargetEntry *) lfirst(l);
1549
1550                 if (!tle->resjunk)
1551                         sortlist = addTargetToSortList(pstate, tle,
1552                                                                                    sortlist, targetlist,
1553                                                                                    SORTBY_ASC, NIL,
1554                                                                                    resolveUnknown);
1555         }
1556         return sortlist;
1557 }
1558
1559 /*
1560  * addTargetToSortList
1561  *              If the given targetlist entry isn't already in the ORDER BY list,
1562  *              add it to the end of the list, using the sortop with given name
1563  *              or the default sort operator if opname == NIL.
1564  *
1565  * If resolveUnknown is TRUE, convert TLEs of type UNKNOWN to TEXT.  If not,
1566  * do nothing (which implies the search for a sort operator will fail).
1567  * pstate should be provided if resolveUnknown is TRUE, but can be NULL
1568  * otherwise.
1569  *
1570  * Returns the updated ORDER BY list.
1571  */
1572 List *
1573 addTargetToSortList(ParseState *pstate, TargetEntry *tle,
1574                                         List *sortlist, List *targetlist,
1575                                         int sortby_kind, List *sortby_opname,
1576                                         bool resolveUnknown)
1577 {
1578         /* avoid making duplicate sortlist entries */
1579         if (!targetIsInSortList(tle, sortlist))
1580         {
1581                 SortClause *sortcl = makeNode(SortClause);
1582                 Oid                     restype = exprType((Node *) tle->expr);
1583
1584                 /* if tlist item is an UNKNOWN literal, change it to TEXT */
1585                 if (restype == UNKNOWNOID && resolveUnknown)
1586                 {
1587                         tle->expr = (Expr *) coerce_type(pstate, (Node *) tle->expr,
1588                                                                                          restype, TEXTOID, -1,
1589                                                                                          COERCION_IMPLICIT,
1590                                                                                          COERCE_IMPLICIT_CAST);
1591                         restype = TEXTOID;
1592                 }
1593
1594                 sortcl->tleSortGroupRef = assignSortGroupRef(tle, targetlist);
1595
1596                 switch (sortby_kind)
1597                 {
1598                         case SORTBY_ASC:
1599                                 sortcl->sortop = ordering_oper_opid(restype);
1600                                 break;
1601                         case SORTBY_DESC:
1602                                 sortcl->sortop = reverse_ordering_oper_opid(restype);
1603                                 break;
1604                         case SORTBY_USING:
1605                                 Assert(sortby_opname != NIL);
1606                                 sortcl->sortop = compatible_oper_opid(sortby_opname,
1607                                                                                                           restype,
1608                                                                                                           restype,
1609                                                                                                           false);
1610                                 break;
1611                         default:
1612                                 elog(ERROR, "unrecognized sortby_kind: %d", sortby_kind);
1613                                 break;
1614                 }
1615
1616                 sortlist = lappend(sortlist, sortcl);
1617         }
1618         return sortlist;
1619 }
1620
1621 /*
1622  * assignSortGroupRef
1623  *        Assign the targetentry an unused ressortgroupref, if it doesn't
1624  *        already have one.  Return the assigned or pre-existing refnumber.
1625  *
1626  * 'tlist' is the targetlist containing (or to contain) the given targetentry.
1627  */
1628 Index
1629 assignSortGroupRef(TargetEntry *tle, List *tlist)
1630 {
1631         Index           maxRef;
1632         ListCell   *l;
1633
1634         if (tle->ressortgroupref)       /* already has one? */
1635                 return tle->ressortgroupref;
1636
1637         /* easiest way to pick an unused refnumber: max used + 1 */
1638         maxRef = 0;
1639         foreach(l, tlist)
1640         {
1641                 Index           ref = ((TargetEntry *) lfirst(l))->ressortgroupref;
1642
1643                 if (ref > maxRef)
1644                         maxRef = ref;
1645         }
1646         tle->ressortgroupref = maxRef + 1;
1647         return tle->ressortgroupref;
1648 }
1649
1650 /*
1651  * targetIsInSortList
1652  *              Is the given target item already in the sortlist?
1653  *
1654  * Works for both SortClause and GroupClause lists.  Note that the main
1655  * reason we need this routine (and not just a quick test for nonzeroness
1656  * of ressortgroupref) is that a TLE might be in only one of the lists.
1657  */
1658 bool
1659 targetIsInSortList(TargetEntry *tle, List *sortList)
1660 {
1661         Index           ref = tle->ressortgroupref;
1662         ListCell   *l;
1663
1664         /* no need to scan list if tle has no marker */
1665         if (ref == 0)
1666                 return false;
1667
1668         foreach(l, sortList)
1669         {
1670                 SortClause *scl = (SortClause *) lfirst(l);
1671
1672                 if (scl->tleSortGroupRef == ref)
1673                         return true;
1674         }
1675         return false;
1676 }