granicus.if.org Git - postgresql/blob - src/backend/parser/parse_clause.c

   1 /*-------------------------------------------------------------------------
   2  *
   3  * parse_clause.c
   4  *        handle clauses in parser
   5  *
   6  * Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group
   7  * Portions Copyright (c) 1994, Regents of the University of California
   8  *
   9  *
  10  * IDENTIFICATION
  11  *        $PostgreSQL: pgsql/src/backend/parser/parse_clause.c,v 1.160 2006/12/24 00:29:19 tgl Exp $
  12  *
  13  *-------------------------------------------------------------------------
  14  */
  15
  16 #include "postgres.h"
  17
  18 #include "access/heapam.h"
  19 #include "catalog/heap.h"
  20 #include "catalog/pg_type.h"
  21 #include "commands/defrem.h"
  22 #include "nodes/makefuncs.h"
  23 #include "optimizer/clauses.h"
  24 #include "optimizer/tlist.h"
  25 #include "optimizer/var.h"
  26 #include "parser/analyze.h"
  27 #include "parser/parsetree.h"
  28 #include "parser/parse_clause.h"
  29 #include "parser/parse_coerce.h"
  30 #include "parser/parse_expr.h"
  31 #include "parser/parse_oper.h"
  32 #include "parser/parse_relation.h"
  33 #include "parser/parse_target.h"
  34 #include "rewrite/rewriteManip.h"
  35 #include "utils/guc.h"
  36
  37
  38 #define ORDER_CLAUSE 0
  39 #define GROUP_CLAUSE 1
  40 #define DISTINCT_ON_CLAUSE 2
  41
  42 static char *clauseText[] = {"ORDER BY", "GROUP BY", "DISTINCT ON"};
  43
  44 static void extractRemainingColumns(List *common_colnames,
  45                                                 List *src_colnames, List *src_colvars,
  46                                                 List **res_colnames, List **res_colvars);
  47 static Node *transformJoinUsingClause(ParseState *pstate,
  48                                                  List *leftVars, List *rightVars);
  49 static Node *transformJoinOnClause(ParseState *pstate, JoinExpr *j,
  50                                           RangeTblEntry *l_rte,
  51                                           RangeTblEntry *r_rte,
  52                                           List *relnamespace,
  53                                           Relids containedRels);
  54 static RangeTblEntry *transformTableEntry(ParseState *pstate, RangeVar *r);
  55 static RangeTblEntry *transformRangeSubselect(ParseState *pstate,
  56                                                 RangeSubselect *r);
  57 static RangeTblEntry *transformRangeFunction(ParseState *pstate,
  58                                            RangeFunction *r);
  59 static Node *transformFromClauseItem(ParseState *pstate, Node *n,
  60                                                 RangeTblEntry **top_rte, int *top_rti,
  61                                                 List **relnamespace,
  62                                                 Relids *containedRels);
  63 static Node *buildMergedJoinVar(ParseState *pstate, JoinType jointype,
  64                                    Var *l_colvar, Var *r_colvar);
  65 static TargetEntry *findTargetlistEntry(ParseState *pstate, Node *node,
  66                                         List **tlist, int clause);
  67
  68
  69 /*
  70  * transformFromClause -
  71  *        Process the FROM clause and add items to the query's range table,
  72  *        joinlist, and namespaces.
  73  *
  74  * Note: we assume that pstate's p_rtable, p_joinlist, p_relnamespace, and
  75  * p_varnamespace lists were initialized to NIL when the pstate was created.
  76  * We will add onto any entries already present --- this is needed for rule
  77  * processing, as well as for UPDATE and DELETE.
  78  *
  79  * The range table may grow still further when we transform the expressions
  80  * in the query's quals and target list. (This is possible because in
  81  * POSTQUEL, we allowed references to relations not specified in the
  82  * from-clause.  PostgreSQL keeps this extension to standard SQL.)
  83  */
  84 void
  85 transformFromClause(ParseState *pstate, List *frmList)
  86 {
  87         ListCell   *fl;
  88
  89         /*
  90          * The grammar will have produced a list of RangeVars, RangeSubselects,
  91          * RangeFunctions, and/or JoinExprs. Transform each one (possibly adding
  92          * entries to the rtable), check for duplicate refnames, and then add it
  93          * to the joinlist and namespaces.
  94          */
  95         foreach(fl, frmList)
  96         {
  97                 Node       *n = lfirst(fl);
  98                 RangeTblEntry *rte;
  99                 int                     rtindex;
 100                 List       *relnamespace;
 101                 Relids          containedRels;
 102
 103                 n = transformFromClauseItem(pstate, n,
 104                                                                         &rte,
 105                                                                         &rtindex,
 106                                                                         &relnamespace,
 107                                                                         &containedRels);
 108                 checkNameSpaceConflicts(pstate, pstate->p_relnamespace, relnamespace);
 109                 pstate->p_joinlist = lappend(pstate->p_joinlist, n);
 110                 pstate->p_relnamespace = list_concat(pstate->p_relnamespace,
 111                                                                                          relnamespace);
 112                 pstate->p_varnamespace = lappend(pstate->p_varnamespace, rte);
 113                 bms_free(containedRels);
 114         }
 115 }
 116
 117 /*
 118  * setTargetTable
 119  *        Add the target relation of INSERT/UPDATE/DELETE to the range table,
 120  *        and make the special links to it in the ParseState.
 121  *
 122  *        We also open the target relation and acquire a write lock on it.
 123  *        This must be done before processing the FROM list, in case the target
 124  *        is also mentioned as a source relation --- we want to be sure to grab
 125  *        the write lock before any read lock.
 126  *
 127  *        If alsoSource is true, add the target to the query's joinlist and
 128  *        namespace.  For INSERT, we don't want the target to be joined to;
 129  *        it's a destination of tuples, not a source.   For UPDATE/DELETE,
 130  *        we do need to scan or join the target.  (NOTE: we do not bother
 131  *        to check for namespace conflict; we assume that the namespace was
 132  *        initially empty in these cases.)
 133  *
 134  *        Finally, we mark the relation as requiring the permissions specified
 135  *        by requiredPerms.
 136  *
 137  *        Returns the rangetable index of the target relation.
 138  */
 139 int
 140 setTargetTable(ParseState *pstate, RangeVar *relation,
 141                            bool inh, bool alsoSource, AclMode requiredPerms)
 142 {
 143         RangeTblEntry *rte;
 144         int                     rtindex;
 145
 146         /* Close old target; this could only happen for multi-action rules */
 147         if (pstate->p_target_relation != NULL)
 148                 heap_close(pstate->p_target_relation, NoLock);
 149
 150         /*
 151          * Open target rel and grab suitable lock (which we will hold till end of
 152          * transaction).
 153          *
 154          * analyze.c will eventually do the corresponding heap_close(), but *not*
 155          * release the lock.
 156          */
 157         pstate->p_target_relation = heap_openrv(relation, RowExclusiveLock);
 158
 159         /*
 160          * Now build an RTE.
 161          */
 162         rte = addRangeTableEntryForRelation(pstate, pstate->p_target_relation,
 163                                                                                 relation->alias, inh, false);
 164         pstate->p_target_rangetblentry = rte;
 165
 166         /* assume new rte is at end */
 167         rtindex = list_length(pstate->p_rtable);
 168         Assert(rte == rt_fetch(rtindex, pstate->p_rtable));
 169
 170         /*
 171          * Override addRangeTableEntry's default ACL_SELECT permissions check, and
 172          * instead mark target table as requiring exactly the specified
 173          * permissions.
 174          *
 175          * If we find an explicit reference to the rel later during parse
 176          * analysis, we will add the ACL_SELECT bit back again; see
 177          * scanRTEForColumn (for simple field references), ExpandColumnRefStar
 178          * (for foo.*) and ExpandAllTables (for *).
 179          */
 180         rte->requiredPerms = requiredPerms;
 181
 182         /*
 183          * If UPDATE/DELETE, add table to joinlist and namespaces.
 184          */
 185         if (alsoSource)
 186                 addRTEtoQuery(pstate, rte, true, true, true);
 187
 188         return rtindex;
 189 }
 190
 191 /*
 192  * Simplify InhOption (yes/no/default) into boolean yes/no.
 193  *
 194  * The reason we do things this way is that we don't want to examine the
 195  * SQL_inheritance option flag until parse_analyze is run.      Otherwise,
 196  * we'd do the wrong thing with query strings that intermix SET commands
 197  * with queries.
 198  */
 199 bool
 200 interpretInhOption(InhOption inhOpt)
 201 {
 202         switch (inhOpt)
 203         {
 204                 case INH_NO:
 205                         return false;
 206                 case INH_YES:
 207                         return true;
 208                 case INH_DEFAULT:
 209                         return SQL_inheritance;
 210         }
 211         elog(ERROR, "bogus InhOption value: %d", inhOpt);
 212         return false;                           /* keep compiler quiet */
 213 }
 214
 215 /*
 216  * Given a relation-options list (of DefElems), return true iff the specified
 217  * table/result set should be created with OIDs. This needs to be done after
 218  * parsing the query string because the return value can depend upon the
 219  * default_with_oids GUC var.
 220  */
 221 bool
 222 interpretOidsOption(List *defList)
 223 {
 224         ListCell   *cell;
 225
 226         /* Scan list to see if OIDS was included */
 227         foreach(cell, defList)
 228         {
 229                 DefElem    *def = (DefElem *) lfirst(cell);
 230
 231                 if (pg_strcasecmp(def->defname, "oids") == 0)
 232                         return defGetBoolean(def);
 233         }
 234
 235         /* OIDS option was not specified, so use default. */
 236         return default_with_oids;
 237 }
 238
 239 /*
 240  * Extract all not-in-common columns from column lists of a source table
 241  */
 242 static void
 243 extractRemainingColumns(List *common_colnames,
 244                                                 List *src_colnames, List *src_colvars,
 245                                                 List **res_colnames, List **res_colvars)
 246 {
 247         List       *new_colnames = NIL;
 248         List       *new_colvars = NIL;
 249         ListCell   *lnames,
 250                            *lvars;
 251
 252         Assert(list_length(src_colnames) == list_length(src_colvars));
 253
 254         forboth(lnames, src_colnames, lvars, src_colvars)
 255         {
 256                 char       *colname = strVal(lfirst(lnames));
 257                 bool            match = false;
 258                 ListCell   *cnames;
 259
 260                 foreach(cnames, common_colnames)
 261                 {
 262                         char       *ccolname = strVal(lfirst(cnames));
 263
 264                         if (strcmp(colname, ccolname) == 0)
 265                         {
 266                                 match = true;
 267                                 break;
 268                         }
 269                 }
 270
 271                 if (!match)
 272                 {
 273                         new_colnames = lappend(new_colnames, lfirst(lnames));
 274                         new_colvars = lappend(new_colvars, lfirst(lvars));
 275                 }
 276         }
 277
 278         *res_colnames = new_colnames;
 279         *res_colvars = new_colvars;
 280 }
 281
 282 /* transformJoinUsingClause()
 283  *        Build a complete ON clause from a partially-transformed USING list.
 284  *        We are given lists of nodes representing left and right match columns.
 285  *        Result is a transformed qualification expression.
 286  */
 287 static Node *
 288 transformJoinUsingClause(ParseState *pstate, List *leftVars, List *rightVars)
 289 {
 290         Node       *result = NULL;
 291         ListCell   *lvars,
 292                            *rvars;
 293
 294         /*
 295          * We cheat a little bit here by building an untransformed operator tree
 296          * whose leaves are the already-transformed Vars.  This is OK because
 297          * transformExpr() won't complain about already-transformed subnodes.
 298          */
 299         forboth(lvars, leftVars, rvars, rightVars)
 300         {
 301                 Node       *lvar = (Node *) lfirst(lvars);
 302                 Node       *rvar = (Node *) lfirst(rvars);
 303                 A_Expr     *e;
 304
 305                 e = makeSimpleA_Expr(AEXPR_OP, "=",
 306                                                          copyObject(lvar), copyObject(rvar),
 307                                                          -1);
 308
 309                 if (result == NULL)
 310                         result = (Node *) e;
 311                 else
 312                 {
 313                         A_Expr     *a;
 314
 315                         a = makeA_Expr(AEXPR_AND, NIL, result, (Node *) e, -1);
 316                         result = (Node *) a;
 317                 }
 318         }
 319
 320         /*
 321          * Since the references are already Vars, and are certainly from the input
 322          * relations, we don't have to go through the same pushups that
 323          * transformJoinOnClause() does.  Just invoke transformExpr() to fix up
 324          * the operators, and we're done.
 325          */
 326         result = transformExpr(pstate, result);
 327
 328         result = coerce_to_boolean(pstate, result, "JOIN/USING");
 329
 330         return result;
 331 }
 332
 333 /* transformJoinOnClause()
 334  *        Transform the qual conditions for JOIN/ON.
 335  *        Result is a transformed qualification expression.
 336  */
 337 static Node *
 338 transformJoinOnClause(ParseState *pstate, JoinExpr *j,
 339                                           RangeTblEntry *l_rte,
 340                                           RangeTblEntry *r_rte,
 341                                           List *relnamespace,
 342                                           Relids containedRels)
 343 {
 344         Node       *result;
 345         List       *save_relnamespace;
 346         List       *save_varnamespace;
 347         Relids          clause_varnos;
 348         int                     varno;
 349
 350         /*
 351          * This is a tad tricky, for two reasons.  First, the namespace that the
 352          * join expression should see is just the two subtrees of the JOIN plus
 353          * any outer references from upper pstate levels.  So, temporarily set
 354          * this pstate's namespace accordingly.  (We need not check for refname
 355          * conflicts, because transformFromClauseItem() already did.) NOTE: this
 356          * code is OK only because the ON clause can't legally alter the namespace
 357          * by causing implicit relation refs to be added.
 358          */
 359         save_relnamespace = pstate->p_relnamespace;
 360         save_varnamespace = pstate->p_varnamespace;
 361
 362         pstate->p_relnamespace = relnamespace;
 363         pstate->p_varnamespace = list_make2(l_rte, r_rte);
 364
 365         result = transformWhereClause(pstate, j->quals, "JOIN/ON");
 366
 367         pstate->p_relnamespace = save_relnamespace;
 368         pstate->p_varnamespace = save_varnamespace;
 369
 370         /*
 371          * Second, we need to check that the ON condition doesn't refer to any
 372          * rels outside the input subtrees of the JOIN.  It could do that despite
 373          * our hack on the namespace if it uses fully-qualified names. So, grovel
 374          * through the transformed clause and make sure there are no bogus
 375          * references.  (Outer references are OK, and are ignored here.)
 376          */
 377         clause_varnos = pull_varnos(result);
 378         clause_varnos = bms_del_members(clause_varnos, containedRels);
 379         if ((varno = bms_first_member(clause_varnos)) >= 0)
 380         {
 381                 ereport(ERROR,
 382                                 (errcode(ERRCODE_INVALID_COLUMN_REFERENCE),
 383                  errmsg("JOIN/ON clause refers to \"%s\", which is not part of JOIN",
 384                                 rt_fetch(varno, pstate->p_rtable)->eref->aliasname)));
 385         }
 386         bms_free(clause_varnos);
 387
 388         return result;
 389 }
 390
 391 /*
 392  * transformTableEntry --- transform a RangeVar (simple relation reference)
 393  */
 394 static RangeTblEntry *
 395 transformTableEntry(ParseState *pstate, RangeVar *r)
 396 {
 397         RangeTblEntry *rte;
 398
 399         /*
 400          * mark this entry to indicate it comes from the FROM clause. In SQL, the
 401          * target list can only refer to range variables specified in the from
 402          * clause but we follow the more powerful POSTQUEL semantics and
 403          * automatically generate the range variable if not specified. However
 404          * there are times we need to know whether the entries are legitimate.
 405          */
 406         rte = addRangeTableEntry(pstate, r, r->alias,
 407                                                          interpretInhOption(r->inhOpt), true);
 408
 409         return rte;
 410 }
 411
 412
 413 /*
 414  * transformRangeSubselect --- transform a sub-SELECT appearing in FROM
 415  */
 416 static RangeTblEntry *
 417 transformRangeSubselect(ParseState *pstate, RangeSubselect *r)
 418 {
 419         List       *parsetrees;
 420         Query      *query;
 421         RangeTblEntry *rte;
 422
 423         /*
 424          * We require user to supply an alias for a subselect, per SQL92. To relax
 425          * this, we'd have to be prepared to gin up a unique alias for an
 426          * unlabeled subselect.
 427          */
 428         if (r->alias == NULL)
 429                 ereport(ERROR,
 430                                 (errcode(ERRCODE_SYNTAX_ERROR),
 431                                  errmsg("subquery in FROM must have an alias")));
 432
 433         /*
 434          * Analyze and transform the subquery.
 435          */
 436         parsetrees = parse_sub_analyze(r->subquery, pstate);
 437
 438         /*
 439          * Check that we got something reasonable.      Most of these conditions are
 440          * probably impossible given restrictions of the grammar, but check 'em
 441          * anyway.
 442          */
 443         if (list_length(parsetrees) != 1)
 444                 elog(ERROR, "unexpected parse analysis result for subquery in FROM");
 445         query = (Query *) linitial(parsetrees);
 446         if (query == NULL || !IsA(query, Query))
 447                 elog(ERROR, "unexpected parse analysis result for subquery in FROM");
 448
 449         if (query->commandType != CMD_SELECT)
 450                 elog(ERROR, "expected SELECT query from subquery in FROM");
 451         if (query->into != NULL)
 452                 ereport(ERROR,
 453                                 (errcode(ERRCODE_SYNTAX_ERROR),
 454                                  errmsg("subquery in FROM may not have SELECT INTO")));
 455
 456         /*
 457          * The subquery cannot make use of any variables from FROM items created
 458          * earlier in the current query.  Per SQL92, the scope of a FROM item does
 459          * not include other FROM items.  Formerly we hacked the namespace so that
 460          * the other variables weren't even visible, but it seems more useful to
 461          * leave them visible and give a specific error message.
 462          *
 463          * XXX this will need further work to support SQL99's LATERAL() feature,
 464          * wherein such references would indeed be legal.
 465          *
 466          * We can skip groveling through the subquery if there's not anything
 467          * visible in the current query.  Also note that outer references are OK.
 468          */
 469         if (pstate->p_relnamespace || pstate->p_varnamespace)
 470         {
 471                 if (contain_vars_of_level((Node *) query, 1))
 472                         ereport(ERROR,
 473                                         (errcode(ERRCODE_INVALID_COLUMN_REFERENCE),
 474                                          errmsg("subquery in FROM may not refer to other relations of same query level")));
 475         }
 476
 477         /*
 478          * OK, build an RTE for the subquery.
 479          */
 480         rte = addRangeTableEntryForSubquery(pstate, query, r->alias, true);
 481
 482         return rte;
 483 }
 484
 485
 486 /*
 487  * transformRangeFunction --- transform a function call appearing in FROM
 488  */
 489 static RangeTblEntry *
 490 transformRangeFunction(ParseState *pstate, RangeFunction *r)
 491 {
 492         Node       *funcexpr;
 493         char       *funcname;
 494         RangeTblEntry *rte;
 495
 496         /*
 497          * Get function name for possible use as alias.  We use the same
 498          * transformation rules as for a SELECT output expression.      For a FuncCall
 499          * node, the result will be the function name, but it is possible for the
 500          * grammar to hand back other node types.
 501          */
 502         funcname = FigureColname(r->funccallnode);
 503
 504         /*
 505          * Transform the raw expression.
 506          */
 507         funcexpr = transformExpr(pstate, r->funccallnode);
 508
 509         /*
 510          * The function parameters cannot make use of any variables from other
 511          * FROM items.  (Compare to transformRangeSubselect(); the coding is
 512          * different though because we didn't parse as a sub-select with its own
 513          * level of namespace.)
 514          *
 515          * XXX this will need further work to support SQL99's LATERAL() feature,
 516          * wherein such references would indeed be legal.
 517          */
 518         if (pstate->p_relnamespace || pstate->p_varnamespace)
 519         {
 520                 if (contain_vars_of_level(funcexpr, 0))
 521                         ereport(ERROR,
 522                                         (errcode(ERRCODE_INVALID_COLUMN_REFERENCE),
 523                                          errmsg("function expression in FROM may not refer to other relations of same query level")));
 524         }
 525
 526         /*
 527          * Disallow aggregate functions in the expression.      (No reason to postpone
 528          * this check until parseCheckAggregates.)
 529          */
 530         if (pstate->p_hasAggs)
 531         {
 532                 if (checkExprHasAggs(funcexpr))
 533                         ereport(ERROR,
 534                                         (errcode(ERRCODE_GROUPING_ERROR),
 535                                          errmsg("cannot use aggregate function in function expression in FROM")));
 536         }
 537
 538         /*
 539          * OK, build an RTE for the function.
 540          */
 541         rte = addRangeTableEntryForFunction(pstate, funcname, funcexpr,
 542                                                                                 r, true);
 543
 544         /*
 545          * If a coldeflist was supplied, ensure it defines a legal set of names
 546          * (no duplicates) and datatypes (no pseudo-types, for instance).
 547          * addRangeTableEntryForFunction looked up the type names but didn't check
 548          * them further than that.
 549          */
 550         if (r->coldeflist)
 551         {
 552                 TupleDesc       tupdesc;
 553
 554                 tupdesc = BuildDescFromLists(rte->eref->colnames,
 555                                                                          rte->funccoltypes,
 556                                                                          rte->funccoltypmods);
 557                 CheckAttributeNamesTypes(tupdesc, RELKIND_COMPOSITE_TYPE);
 558         }
 559
 560         return rte;
 561 }
 562
 563
 564 /*
 565  * transformFromClauseItem -
 566  *        Transform a FROM-clause item, adding any required entries to the
 567  *        range table list being built in the ParseState, and return the
 568  *        transformed item ready to include in the joinlist and namespaces.
 569  *        This routine can recurse to handle SQL92 JOIN expressions.
 570  *
 571  * The function return value is the node to add to the jointree (a
 572  * RangeTblRef or JoinExpr).  Additional output parameters are:
 573  *
 574  * *top_rte: receives the RTE corresponding to the jointree item.
 575  * (We could extract this from the function return node, but it saves cycles
 576  * to pass it back separately.)
 577  *
 578  * *top_rti: receives the rangetable index of top_rte.  (Ditto.)
 579  *
 580  * *relnamespace: receives a List of the RTEs exposed as relation names
 581  * by this item.
 582  *
 583  * *containedRels: receives a bitmap set of the rangetable indexes
 584  * of all the base and join relations represented in this jointree item.
 585  * This is needed for checking JOIN/ON conditions in higher levels.
 586  *
 587  * We do not need to pass back an explicit varnamespace value, because
 588  * in all cases the varnamespace contribution is exactly top_rte.
 589  */
 590 static Node *
 591 transformFromClauseItem(ParseState *pstate, Node *n,
 592                                                 RangeTblEntry **top_rte, int *top_rti,
 593                                                 List **relnamespace,
 594                                                 Relids *containedRels)
 595 {
 596         if (IsA(n, RangeVar))
 597         {
 598                 /* Plain relation reference */
 599                 RangeTblRef *rtr;
 600                 RangeTblEntry *rte;
 601                 int                     rtindex;
 602
 603                 rte = transformTableEntry(pstate, (RangeVar *) n);
 604                 /* assume new rte is at end */
 605                 rtindex = list_length(pstate->p_rtable);
 606                 Assert(rte == rt_fetch(rtindex, pstate->p_rtable));
 607                 *top_rte = rte;
 608                 *top_rti = rtindex;
 609                 *relnamespace = list_make1(rte);
 610                 *containedRels = bms_make_singleton(rtindex);
 611                 rtr = makeNode(RangeTblRef);
 612                 rtr->rtindex = rtindex;
 613                 return (Node *) rtr;
 614         }
 615         else if (IsA(n, RangeSubselect))
 616         {
 617                 /* sub-SELECT is like a plain relation */
 618                 RangeTblRef *rtr;
 619                 RangeTblEntry *rte;
 620                 int                     rtindex;
 621
 622                 rte = transformRangeSubselect(pstate, (RangeSubselect *) n);
 623                 /* assume new rte is at end */
 624                 rtindex = list_length(pstate->p_rtable);
 625                 Assert(rte == rt_fetch(rtindex, pstate->p_rtable));
 626                 *top_rte = rte;
 627                 *top_rti = rtindex;
 628                 *relnamespace = list_make1(rte);
 629                 *containedRels = bms_make_singleton(rtindex);
 630                 rtr = makeNode(RangeTblRef);
 631                 rtr->rtindex = rtindex;
 632                 return (Node *) rtr;
 633         }
 634         else if (IsA(n, RangeFunction))
 635         {
 636                 /* function is like a plain relation */
 637                 RangeTblRef *rtr;
 638                 RangeTblEntry *rte;
 639                 int                     rtindex;
 640
 641                 rte = transformRangeFunction(pstate, (RangeFunction *) n);
 642                 /* assume new rte is at end */
 643                 rtindex = list_length(pstate->p_rtable);
 644                 Assert(rte == rt_fetch(rtindex, pstate->p_rtable));
 645                 *top_rte = rte;
 646                 *top_rti = rtindex;
 647                 *relnamespace = list_make1(rte);
 648                 *containedRels = bms_make_singleton(rtindex);
 649                 rtr = makeNode(RangeTblRef);
 650                 rtr->rtindex = rtindex;
 651                 return (Node *) rtr;
 652         }
 653         else if (IsA(n, JoinExpr))
 654         {
 655                 /* A newfangled join expression */
 656                 JoinExpr   *j = (JoinExpr *) n;
 657                 RangeTblEntry *l_rte;
 658                 RangeTblEntry *r_rte;
 659                 int                     l_rtindex;
 660                 int                     r_rtindex;
 661                 Relids          l_containedRels,
 662                                         r_containedRels,
 663                                         my_containedRels;
 664                 List       *l_relnamespace,
 665                                    *r_relnamespace,
 666                                    *my_relnamespace,
 667                                    *l_colnames,
 668                                    *r_colnames,
 669                                    *res_colnames,
 670                                    *l_colvars,
 671                                    *r_colvars,
 672                                    *res_colvars;
 673                 RangeTblEntry *rte;
 674
 675                 /*
 676                  * Recursively process the left and right subtrees
 677                  */
 678                 j->larg = transformFromClauseItem(pstate, j->larg,
 679                                                                                   &l_rte,
 680                                                                                   &l_rtindex,
 681                                                                                   &l_relnamespace,
 682                                                                                   &l_containedRels);
 683                 j->rarg = transformFromClauseItem(pstate, j->rarg,
 684                                                                                   &r_rte,
 685                                                                                   &r_rtindex,
 686                                                                                   &r_relnamespace,
 687                                                                                   &r_containedRels);
 688
 689                 /*
 690                  * Check for conflicting refnames in left and right subtrees. Must do
 691                  * this because higher levels will assume I hand back a self-
 692                  * consistent namespace subtree.
 693                  */
 694                 checkNameSpaceConflicts(pstate, l_relnamespace, r_relnamespace);
 695
 696                 /*
 697                  * Generate combined relation membership info for possible use by
 698                  * transformJoinOnClause below.
 699                  */
 700                 my_relnamespace = list_concat(l_relnamespace, r_relnamespace);
 701                 my_containedRels = bms_join(l_containedRels, r_containedRels);
 702
 703                 pfree(r_relnamespace);  /* free unneeded list header */
 704
 705                 /*
 706                  * Extract column name and var lists from both subtrees
 707                  *
 708                  * Note: expandRTE returns new lists, safe for me to modify
 709                  */
 710                 expandRTE(l_rte, l_rtindex, 0, false,
 711                                   &l_colnames, &l_colvars);
 712                 expandRTE(r_rte, r_rtindex, 0, false,
 713                                   &r_colnames, &r_colvars);
 714
 715                 /*
 716                  * Natural join does not explicitly specify columns; must generate
 717                  * columns to join. Need to run through the list of columns from each
 718                  * table or join result and match up the column names. Use the first
 719                  * table, and check every column in the second table for a match.
 720                  * (We'll check that the matches were unique later on.) The result of
 721                  * this step is a list of column names just like an explicitly-written
 722                  * USING list.
 723                  */
 724                 if (j->isNatural)
 725                 {
 726                         List       *rlist = NIL;
 727                         ListCell   *lx,
 728                                            *rx;
 729
 730                         Assert(j->using == NIL);        /* shouldn't have USING() too */
 731
 732                         foreach(lx, l_colnames)
 733                         {
 734                                 char       *l_colname = strVal(lfirst(lx));
 735                                 Value      *m_name = NULL;
 736
 737                                 foreach(rx, r_colnames)
 738                                 {
 739                                         char       *r_colname = strVal(lfirst(rx));
 740
 741                                         if (strcmp(l_colname, r_colname) == 0)
 742                                         {
 743                                                 m_name = makeString(l_colname);
 744                                                 break;
 745                                         }
 746                                 }
 747
 748                                 /* matched a right column? then keep as join column... */
 749                                 if (m_name != NULL)
 750                                         rlist = lappend(rlist, m_name);
 751                         }
 752
 753                         j->using = rlist;
 754                 }
 755
 756                 /*
 757                  * Now transform the join qualifications, if any.
 758                  */
 759                 res_colnames = NIL;
 760                 res_colvars = NIL;
 761
 762                 if (j->using)
 763                 {
 764                         /*
 765                          * JOIN/USING (or NATURAL JOIN, as transformed above). Transform
 766                          * the list into an explicit ON-condition, and generate a list of
 767                          * merged result columns.
 768                          */
 769                         List       *ucols = j->using;
 770                         List       *l_usingvars = NIL;
 771                         List       *r_usingvars = NIL;
 772                         ListCell   *ucol;
 773
 774                         Assert(j->quals == NULL);       /* shouldn't have ON() too */
 775
 776                         foreach(ucol, ucols)
 777                         {
 778                                 char       *u_colname = strVal(lfirst(ucol));
 779                                 ListCell   *col;
 780                                 int                     ndx;
 781                                 int                     l_index = -1;
 782                                 int                     r_index = -1;
 783                                 Var                *l_colvar,
 784                                                    *r_colvar;
 785
 786                                 /* Check for USING(foo,foo) */
 787                                 foreach(col, res_colnames)
 788                                 {
 789                                         char       *res_colname = strVal(lfirst(col));
 790
 791                                         if (strcmp(res_colname, u_colname) == 0)
 792                                                 ereport(ERROR,
 793                                                                 (errcode(ERRCODE_DUPLICATE_COLUMN),
 794                                                                  errmsg("column name \"%s\" appears more than once in USING clause",
 795                                                                                 u_colname)));
 796                                 }
 797
 798                                 /* Find it in left input */
 799                                 ndx = 0;
 800                                 foreach(col, l_colnames)
 801                                 {
 802                                         char       *l_colname = strVal(lfirst(col));
 803
 804                                         if (strcmp(l_colname, u_colname) == 0)
 805                                         {
 806                                                 if (l_index >= 0)
 807                                                         ereport(ERROR,
 808                                                                         (errcode(ERRCODE_AMBIGUOUS_COLUMN),
 809                                                                          errmsg("common column name \"%s\" appears more than once in left table",
 810                                                                                         u_colname)));
 811                                                 l_index = ndx;
 812                                         }
 813                                         ndx++;
 814                                 }
 815                                 if (l_index < 0)
 816                                         ereport(ERROR,
 817                                                         (errcode(ERRCODE_UNDEFINED_COLUMN),
 818                                                          errmsg("column \"%s\" specified in USING clause does not exist in left table",
 819                                                                         u_colname)));
 820
 821                                 /* Find it in right input */
 822                                 ndx = 0;
 823                                 foreach(col, r_colnames)
 824                                 {
 825                                         char       *r_colname = strVal(lfirst(col));
 826
 827                                         if (strcmp(r_colname, u_colname) == 0)
 828                                         {
 829                                                 if (r_index >= 0)
 830                                                         ereport(ERROR,
 831                                                                         (errcode(ERRCODE_AMBIGUOUS_COLUMN),
 832                                                                          errmsg("common column name \"%s\" appears more than once in right table",
 833                                                                                         u_colname)));
 834                                                 r_index = ndx;
 835                                         }
 836                                         ndx++;
 837                                 }
 838                                 if (r_index < 0)
 839                                         ereport(ERROR,
 840                                                         (errcode(ERRCODE_UNDEFINED_COLUMN),
 841                                                          errmsg("column \"%s\" specified in USING clause does not exist in right table",
 842                                                                         u_colname)));
 843
 844                                 l_colvar = list_nth(l_colvars, l_index);
 845                                 l_usingvars = lappend(l_usingvars, l_colvar);
 846                                 r_colvar = list_nth(r_colvars, r_index);
 847                                 r_usingvars = lappend(r_usingvars, r_colvar);
 848
 849                                 res_colnames = lappend(res_colnames, lfirst(ucol));
 850                                 res_colvars = lappend(res_colvars,
 851                                                                           buildMergedJoinVar(pstate,
 852                                                                                                                  j->jointype,
 853                                                                                                                  l_colvar,
 854                                                                                                                  r_colvar));
 855                         }
 856
 857                         j->quals = transformJoinUsingClause(pstate,
 858                                                                                                 l_usingvars,
 859                                                                                                 r_usingvars);
 860                 }
 861                 else if (j->quals)
 862                 {
 863                         /* User-written ON-condition; transform it */
 864                         j->quals = transformJoinOnClause(pstate, j,
 865                                                                                          l_rte, r_rte,
 866                                                                                          my_relnamespace,
 867                                                                                          my_containedRels);
 868                 }
 869                 else
 870                 {
 871                         /* CROSS JOIN: no quals */
 872                 }
 873
 874                 /* Add remaining columns from each side to the output columns */
 875                 extractRemainingColumns(res_colnames,
 876                                                                 l_colnames, l_colvars,
 877                                                                 &l_colnames, &l_colvars);
 878                 extractRemainingColumns(res_colnames,
 879                                                                 r_colnames, r_colvars,
 880                                                                 &r_colnames, &r_colvars);
 881                 res_colnames = list_concat(res_colnames, l_colnames);
 882                 res_colvars = list_concat(res_colvars, l_colvars);
 883                 res_colnames = list_concat(res_colnames, r_colnames);
 884                 res_colvars = list_concat(res_colvars, r_colvars);
 885
 886                 /*
 887                  * Check alias (AS clause), if any.
 888                  */
 889                 if (j->alias)
 890                 {
 891                         if (j->alias->colnames != NIL)
 892                         {
 893                                 if (list_length(j->alias->colnames) > list_length(res_colnames))
 894                                         ereport(ERROR,
 895                                                         (errcode(ERRCODE_SYNTAX_ERROR),
 896                                                          errmsg("column alias list for \"%s\" has too many entries",
 897                                                                         j->alias->aliasname)));
 898                         }
 899                 }
 900
 901                 /*
 902                  * Now build an RTE for the result of the join
 903                  */
 904                 rte = addRangeTableEntryForJoin(pstate,
 905                                                                                 res_colnames,
 906                                                                                 j->jointype,
 907                                                                                 res_colvars,
 908                                                                                 j->alias,
 909                                                                                 true);
 910
 911                 /* assume new rte is at end */
 912                 j->rtindex = list_length(pstate->p_rtable);
 913                 Assert(rte == rt_fetch(j->rtindex, pstate->p_rtable));
 914
 915                 *top_rte = rte;
 916                 *top_rti = j->rtindex;
 917
 918                 /*
 919                  * Prepare returned namespace list.  If the JOIN has an alias then it
 920                  * hides the contained RTEs as far as the relnamespace goes;
 921                  * otherwise, put the contained RTEs and *not* the JOIN into
 922                  * relnamespace.
 923                  */
 924                 if (j->alias)
 925                 {
 926                         *relnamespace = list_make1(rte);
 927                         list_free(my_relnamespace);
 928                 }
 929                 else
 930                         *relnamespace = my_relnamespace;
 931
 932                 /*
 933                  * Include join RTE in returned containedRels set
 934                  */
 935                 *containedRels = bms_add_member(my_containedRels, j->rtindex);
 936
 937                 return (Node *) j;
 938         }
 939         else
 940                 elog(ERROR, "unrecognized node type: %d", (int) nodeTag(n));
 941         return NULL;                            /* can't get here, keep compiler quiet */
 942 }
 943
 944 /*
 945  * buildMergedJoinVar -
 946  *        generate a suitable replacement expression for a merged join column
 947  */
 948 static Node *
 949 buildMergedJoinVar(ParseState *pstate, JoinType jointype,
 950                                    Var *l_colvar, Var *r_colvar)
 951 {
 952         Oid                     outcoltype;
 953         int32           outcoltypmod;
 954         Node       *l_node,
 955                            *r_node,
 956                            *res_node;
 957
 958         /*
 959          * Choose output type if input types are dissimilar.
 960          */
 961         outcoltype = l_colvar->vartype;
 962         outcoltypmod = l_colvar->vartypmod;
 963         if (outcoltype != r_colvar->vartype)
 964         {
 965                 outcoltype = select_common_type(list_make2_oid(l_colvar->vartype,
 966                                                                                                            r_colvar->vartype),
 967                                                                                 "JOIN/USING");
 968                 outcoltypmod = -1;              /* ie, unknown */
 969         }
 970         else if (outcoltypmod != r_colvar->vartypmod)
 971         {
 972                 /* same type, but not same typmod */
 973                 outcoltypmod = -1;              /* ie, unknown */
 974         }
 975
 976         /*
 977          * Insert coercion functions if needed.  Note that a difference in typmod
 978          * can only happen if input has typmod but outcoltypmod is -1. In that
 979          * case we insert a RelabelType to clearly mark that result's typmod is
 980          * not same as input.  We never need coerce_type_typmod.
 981          */
 982         if (l_colvar->vartype != outcoltype)
 983                 l_node = coerce_type(pstate, (Node *) l_colvar, l_colvar->vartype,
 984                                                          outcoltype, outcoltypmod,
 985                                                          COERCION_IMPLICIT, COERCE_IMPLICIT_CAST);
 986         else if (l_colvar->vartypmod != outcoltypmod)
 987                 l_node = (Node *) makeRelabelType((Expr *) l_colvar,
 988                                                                                   outcoltype, outcoltypmod,
 989                                                                                   COERCE_IMPLICIT_CAST);
 990         else
 991                 l_node = (Node *) l_colvar;
 992
 993         if (r_colvar->vartype != outcoltype)
 994                 r_node = coerce_type(pstate, (Node *) r_colvar, r_colvar->vartype,
 995                                                          outcoltype, outcoltypmod,
 996                                                          COERCION_IMPLICIT, COERCE_IMPLICIT_CAST);
 997         else if (r_colvar->vartypmod != outcoltypmod)
 998                 r_node = (Node *) makeRelabelType((Expr *) r_colvar,
 999                                                                                   outcoltype, outcoltypmod,
1000                                                                                   COERCE_IMPLICIT_CAST);
1001         else
1002                 r_node = (Node *) r_colvar;
1003
1004         /*
1005          * Choose what to emit
1006          */
1007         switch (jointype)
1008         {
1009                 case JOIN_INNER:
1010
1011                         /*
1012                          * We can use either var; prefer non-coerced one if available.
1013                          */
1014                         if (IsA(l_node, Var))
1015                                 res_node = l_node;
1016                         else if (IsA(r_node, Var))
1017                                 res_node = r_node;
1018                         else
1019                                 res_node = l_node;
1020                         break;
1021                 case JOIN_LEFT:
1022                         /* Always use left var */
1023                         res_node = l_node;
1024                         break;
1025                 case JOIN_RIGHT:
1026                         /* Always use right var */
1027                         res_node = r_node;
1028                         break;
1029                 case JOIN_FULL:
1030                         {
1031                                 /*
1032                                  * Here we must build a COALESCE expression to ensure that the
1033                                  * join output is non-null if either input is.
1034                                  */
1035                                 CoalesceExpr *c = makeNode(CoalesceExpr);
1036
1037                                 c->coalescetype = outcoltype;
1038                                 c->args = list_make2(l_node, r_node);
1039                                 res_node = (Node *) c;
1040                                 break;
1041                         }
1042                 default:
1043                         elog(ERROR, "unrecognized join type: %d", (int) jointype);
1044                         res_node = NULL;        /* keep compiler quiet */
1045                         break;
1046         }
1047
1048         return res_node;
1049 }
1050
1051
1052 /*
1053  * transformWhereClause -
1054  *        Transform the qualification and make sure it is of type boolean.
1055  *        Used for WHERE and allied clauses.
1056  *
1057  * constructName does not affect the semantics, but is used in error messages
1058  */
1059 Node *
1060 transformWhereClause(ParseState *pstate, Node *clause,
1061                                          const char *constructName)
1062 {
1063         Node       *qual;
1064
1065         if (clause == NULL)
1066                 return NULL;
1067
1068         qual = transformExpr(pstate, clause);
1069
1070         qual = coerce_to_boolean(pstate, qual, constructName);
1071
1072         return qual;
1073 }
1074
1075
1076 /*
1077  * transformLimitClause -
1078  *        Transform the expression and make sure it is of type bigint.
1079  *        Used for LIMIT and allied clauses.
1080  *
1081  * Note: as of Postgres 8.2, LIMIT expressions are expected to yield int8,
1082  * rather than int4 as before.
1083  *
1084  * constructName does not affect the semantics, but is used in error messages
1085  */
1086 Node *
1087 transformLimitClause(ParseState *pstate, Node *clause,
1088                                          const char *constructName)
1089 {
1090         Node       *qual;
1091
1092         if (clause == NULL)
1093                 return NULL;
1094
1095         qual = transformExpr(pstate, clause);
1096
1097         qual = coerce_to_specific_type(pstate, qual, INT8OID, constructName);
1098
1099         /*
1100          * LIMIT can't refer to any vars or aggregates of the current query; we
1101          * don't allow subselects either (though that case would at least be
1102          * sensible)
1103          */
1104         if (contain_vars_of_level(qual, 0))
1105         {
1106                 ereport(ERROR,
1107                                 (errcode(ERRCODE_INVALID_COLUMN_REFERENCE),
1108                 /* translator: %s is name of a SQL construct, eg LIMIT */
1109                                  errmsg("argument of %s must not contain variables",
1110                                                 constructName)));
1111         }
1112         if (checkExprHasAggs(qual))
1113         {
1114                 ereport(ERROR,
1115                                 (errcode(ERRCODE_GROUPING_ERROR),
1116                 /* translator: %s is name of a SQL construct, eg LIMIT */
1117                                  errmsg("argument of %s must not contain aggregates",
1118                                                 constructName)));
1119         }
1120         if (contain_subplans(qual))
1121         {
1122                 ereport(ERROR,
1123                                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1124                 /* translator: %s is name of a SQL construct, eg LIMIT */
1125                                  errmsg("argument of %s must not contain subqueries",
1126                                                 constructName)));
1127         }
1128
1129         return qual;
1130 }
1131
1132
1133 /*
1134  *      findTargetlistEntry -
1135  *        Returns the targetlist entry matching the given (untransformed) node.
1136  *        If no matching entry exists, one is created and appended to the target
1137  *        list as a "resjunk" node.
1138  *
1139  * node         the ORDER BY, GROUP BY, or DISTINCT ON expression to be matched
1140  * tlist        the target list (passed by reference so we can append to it)
1141  * clause       identifies clause type being processed
1142  */
1143 static TargetEntry *
1144 findTargetlistEntry(ParseState *pstate, Node *node, List **tlist, int clause)
1145 {
1146         TargetEntry *target_result = NULL;
1147         ListCell   *tl;
1148         Node       *expr;
1149
1150         /*----------
1151          * Handle two special cases as mandated by the SQL92 spec:
1152          *
1153          * 1. Bare ColumnName (no qualifier or subscripts)
1154          *        For a bare identifier, we search for a matching column name
1155          *        in the existing target list.  Multiple matches are an error
1156          *        unless they refer to identical values; for example,
1157          *        we allow      SELECT a, a FROM table ORDER BY a
1158          *        but not       SELECT a AS b, b FROM table ORDER BY b
1159          *        If no match is found, we fall through and treat the identifier
1160          *        as an expression.
1161          *        For GROUP BY, it is incorrect to match the grouping item against
1162          *        targetlist entries: according to SQL92, an identifier in GROUP BY
1163          *        is a reference to a column name exposed by FROM, not to a target
1164          *        list column.  However, many implementations (including pre-7.0
1165          *        PostgreSQL) accept this anyway.  So for GROUP BY, we look first
1166          *        to see if the identifier matches any FROM column name, and only
1167          *        try for a targetlist name if it doesn't.  This ensures that we
1168          *        adhere to the spec in the case where the name could be both.
1169          *        DISTINCT ON isn't in the standard, so we can do what we like there;
1170          *        we choose to make it work like ORDER BY, on the rather flimsy
1171          *        grounds that ordinary DISTINCT works on targetlist entries.
1172          *
1173          * 2. IntegerConstant
1174          *        This means to use the n'th item in the existing target list.
1175          *        Note that it would make no sense to order/group/distinct by an
1176          *        actual constant, so this does not create a conflict with our
1177          *        extension to order/group by an expression.
1178          *        GROUP BY column-number is not allowed by SQL92, but since
1179          *        the standard has no other behavior defined for this syntax,
1180          *        we may as well accept this common extension.
1181          *
1182          * Note that pre-existing resjunk targets must not be used in either case,
1183          * since the user didn't write them in his SELECT list.
1184          *
1185          * If neither special case applies, fall through to treat the item as
1186          * an expression.
1187          *----------
1188          */
1189         if (IsA(node, ColumnRef) &&
1190                 list_length(((ColumnRef *) node)->fields) == 1)
1191         {
1192                 char       *name = strVal(linitial(((ColumnRef *) node)->fields));
1193                 int                     location = ((ColumnRef *) node)->location;
1194
1195                 if (clause == GROUP_CLAUSE)
1196                 {
1197                         /*
1198                          * In GROUP BY, we must prefer a match against a FROM-clause
1199                          * column to one against the targetlist.  Look to see if there is
1200                          * a matching column.  If so, fall through to let transformExpr()
1201                          * do the rest.  NOTE: if name could refer ambiguously to more
1202                          * than one column name exposed by FROM, colNameToVar will
1203                          * ereport(ERROR).      That's just what we want here.
1204                          *
1205                          * Small tweak for 7.4.3: ignore matches in upper query levels.
1206                          * This effectively changes the search order for bare names to (1)
1207                          * local FROM variables, (2) local targetlist aliases, (3) outer
1208                          * FROM variables, whereas before it was (1) (3) (2). SQL92 and
1209                          * SQL99 do not allow GROUPing BY an outer reference, so this
1210                          * breaks no cases that are legal per spec, and it seems a more
1211                          * self-consistent behavior.
1212                          */
1213                         if (colNameToVar(pstate, name, true, location) != NULL)
1214                                 name = NULL;
1215                 }
1216
1217                 if (name != NULL)
1218                 {
1219                         foreach(tl, *tlist)
1220                         {
1221                                 TargetEntry *tle = (TargetEntry *) lfirst(tl);
1222
1223                                 if (!tle->resjunk &&
1224                                         strcmp(tle->resname, name) == 0)
1225                                 {
1226                                         if (target_result != NULL)
1227                                         {
1228                                                 if (!equal(target_result->expr, tle->expr))
1229                                                         ereport(ERROR,
1230                                                                         (errcode(ERRCODE_AMBIGUOUS_COLUMN),
1231
1232                                                         /*------
1233                                                           translator: first %s is name of a SQL construct, eg ORDER BY */
1234                                                                          errmsg("%s \"%s\" is ambiguous",
1235                                                                                         clauseText[clause], name),
1236                                                                          parser_errposition(pstate, location)));
1237                                         }
1238                                         else
1239                                                 target_result = tle;
1240                                         /* Stay in loop to check for ambiguity */
1241                                 }
1242                         }
1243                         if (target_result != NULL)
1244                                 return target_result;   /* return the first match */
1245                 }
1246         }
1247         if (IsA(node, A_Const))
1248         {
1249                 Value      *val = &((A_Const *) node)->val;
1250                 int                     targetlist_pos = 0;
1251                 int                     target_pos;
1252
1253                 if (!IsA(val, Integer))
1254                         ereport(ERROR,
1255                                         (errcode(ERRCODE_SYNTAX_ERROR),
1256                         /* translator: %s is name of a SQL construct, eg ORDER BY */
1257                                          errmsg("non-integer constant in %s",
1258                                                         clauseText[clause])));
1259                 target_pos = intVal(val);
1260                 foreach(tl, *tlist)
1261                 {
1262                         TargetEntry *tle = (TargetEntry *) lfirst(tl);
1263
1264                         if (!tle->resjunk)
1265                         {
1266                                 if (++targetlist_pos == target_pos)
1267                                         return tle; /* return the unique match */
1268                         }
1269                 }
1270                 ereport(ERROR,
1271                                 (errcode(ERRCODE_INVALID_COLUMN_REFERENCE),
1272                 /* translator: %s is name of a SQL construct, eg ORDER BY */
1273                                  errmsg("%s position %d is not in select list",
1274                                                 clauseText[clause], target_pos)));
1275         }
1276
1277         /*
1278          * Otherwise, we have an expression (this is a Postgres extension not
1279          * found in SQL92).  Convert the untransformed node to a transformed
1280          * expression, and search for a match in the tlist. NOTE: it doesn't
1281          * really matter whether there is more than one match.  Also, we are
1282          * willing to match a resjunk target here, though the above cases must
1283          * ignore resjunk targets.
1284          */
1285         expr = transformExpr(pstate, node);
1286
1287         foreach(tl, *tlist)
1288         {
1289                 TargetEntry *tle = (TargetEntry *) lfirst(tl);
1290
1291                 if (equal(expr, tle->expr))
1292                         return tle;
1293         }
1294
1295         /*
1296          * If no matches, construct a new target entry which is appended to the
1297          * end of the target list.      This target is given resjunk = TRUE so that it
1298          * will not be projected into the final tuple.
1299          */
1300         target_result = transformTargetEntry(pstate, node, expr, NULL, true);
1301
1302         *tlist = lappend(*tlist, target_result);
1303
1304         return target_result;
1305 }
1306
1307 static GroupClause *
1308 make_group_clause(TargetEntry *tle, List *targetlist, Oid sortop)
1309 {
1310         GroupClause *result;
1311
1312         result = makeNode(GroupClause);
1313         result->tleSortGroupRef = assignSortGroupRef(tle, targetlist);
1314         result->sortop = sortop;
1315         return result;
1316 }
1317
1318 /*
1319  * transformGroupClause -
1320  *        transform a GROUP BY clause
1321  *
1322  * GROUP BY items will be added to the targetlist (as resjunk columns)
1323  * if not already present, so the targetlist must be passed by reference.
1324  *
1325  * The order of the elements of the grouping clause does not affect
1326  * the semantics of the query. However, the optimizer is not currently
1327  * smart enough to reorder the grouping clause, so we try to do some
1328  * primitive reordering here.
1329  */
1330 List *
1331 transformGroupClause(ParseState *pstate, List *grouplist,
1332                                          List **targetlist, List *sortClause)
1333 {
1334         List       *result = NIL;
1335         List       *tle_list = NIL;
1336         ListCell   *l;
1337
1338         /* Preprocess the grouping clause, lookup TLEs */
1339         foreach(l, grouplist)
1340         {
1341                 TargetEntry *tle;
1342                 Oid                     restype;
1343
1344                 tle = findTargetlistEntry(pstate, lfirst(l),
1345                                                                   targetlist, GROUP_CLAUSE);
1346
1347                 /* if tlist item is an UNKNOWN literal, change it to TEXT */
1348                 restype = exprType((Node *) tle->expr);
1349
1350                 if (restype == UNKNOWNOID)
1351                         tle->expr = (Expr *) coerce_type(pstate, (Node *) tle->expr,
1352                                                                                          restype, TEXTOID, -1,
1353                                                                                          COERCION_IMPLICIT,
1354                                                                                          COERCE_IMPLICIT_CAST);
1355
1356                 tle_list = lappend(tle_list, tle);
1357         }
1358
1359         /*
1360          * Now iterate through the ORDER BY clause. If we find a grouping element
1361          * that matches the ORDER BY element, append the grouping element to the
1362          * result set immediately. Otherwise, stop iterating. The effect of this
1363          * is to look for a prefix of the ORDER BY list in the grouping clauses,
1364          * and to move that prefix to the front of the GROUP BY.
1365          */
1366         foreach(l, sortClause)
1367         {
1368                 SortClause *sc = (SortClause *) lfirst(l);
1369                 ListCell   *prev = NULL;
1370                 ListCell   *tl;
1371                 bool            found = false;
1372
1373                 foreach(tl, tle_list)
1374                 {
1375                         TargetEntry *tle = (TargetEntry *) lfirst(tl);
1376
1377                         if (sc->tleSortGroupRef == tle->ressortgroupref)
1378                         {
1379                                 GroupClause *gc;
1380
1381                                 tle_list = list_delete_cell(tle_list, tl, prev);
1382
1383                                 /* Use the sort clause's sorting operator */
1384                                 gc = make_group_clause(tle, *targetlist, sc->sortop);
1385                                 result = lappend(result, gc);
1386                                 found = true;
1387                                 break;
1388                         }
1389
1390                         prev = tl;
1391                 }
1392
1393                 /* As soon as we've failed to match an ORDER BY element, stop */
1394                 if (!found)
1395                         break;
1396         }
1397
1398         /*
1399          * Now add any remaining elements of the GROUP BY list in the order we
1400          * received them.
1401          *
1402          * XXX: are there any additional criteria to consider when ordering
1403          * grouping clauses?
1404          */
1405         foreach(l, tle_list)
1406         {
1407                 TargetEntry *tle = (TargetEntry *) lfirst(l);
1408                 GroupClause *gc;
1409                 Oid                     sort_op;
1410
1411                 /* avoid making duplicate grouplist entries */
1412                 if (targetIsInSortList(tle, result))
1413                         continue;
1414
1415                 sort_op = ordering_oper_opid(exprType((Node *) tle->expr));
1416                 gc = make_group_clause(tle, *targetlist, sort_op);
1417                 result = lappend(result, gc);
1418         }
1419
1420         list_free(tle_list);
1421         return result;
1422 }
1423
1424 /*
1425  * transformSortClause -
1426  *        transform an ORDER BY clause
1427  *
1428  * ORDER BY items will be added to the targetlist (as resjunk columns)
1429  * if not already present, so the targetlist must be passed by reference.
1430  */
1431 List *
1432 transformSortClause(ParseState *pstate,
1433                                         List *orderlist,
1434                                         List **targetlist,
1435                                         bool resolveUnknown)
1436 {
1437         List       *sortlist = NIL;
1438         ListCell   *olitem;
1439
1440         foreach(olitem, orderlist)
1441         {
1442                 SortBy     *sortby = lfirst(olitem);
1443                 TargetEntry *tle;
1444
1445                 tle = findTargetlistEntry(pstate, sortby->node,
1446                                                                   targetlist, ORDER_CLAUSE);
1447
1448                 sortlist = addTargetToSortList(pstate, tle,
1449                                                                            sortlist, *targetlist,
1450                                                                            sortby->sortby_kind,
1451                                                                            sortby->useOp,
1452                                                                            resolveUnknown);
1453         }
1454
1455         return sortlist;
1456 }
1457
1458 /*
1459  * transformDistinctClause -
1460  *        transform a DISTINCT or DISTINCT ON clause
1461  *
1462  * Since we may need to add items to the query's sortClause list, that list
1463  * is passed by reference.      Likewise for the targetlist.
1464  */
1465 List *
1466 transformDistinctClause(ParseState *pstate, List *distinctlist,
1467                                                 List **targetlist, List **sortClause)
1468 {
1469         List       *result = NIL;
1470         ListCell   *slitem;
1471         ListCell   *dlitem;
1472
1473         /* No work if there was no DISTINCT clause */
1474         if (distinctlist == NIL)
1475                 return NIL;
1476
1477         if (linitial(distinctlist) == NULL)
1478         {
1479                 /* We had SELECT DISTINCT */
1480
1481                 /*
1482                  * All non-resjunk elements from target list that are not already in
1483                  * the sort list should be added to it.  (We don't really care what
1484                  * order the DISTINCT fields are checked in, so we can leave the
1485                  * user's ORDER BY spec alone, and just add additional sort keys to it
1486                  * to ensure that all targetlist items get sorted.)
1487                  */
1488                 *sortClause = addAllTargetsToSortList(pstate,
1489                                                                                           *sortClause,
1490                                                                                           *targetlist,
1491                                                                                           true);
1492
1493                 /*
1494                  * Now, DISTINCT list consists of all non-resjunk sortlist items.
1495                  * Actually, all the sortlist items had better be non-resjunk!
1496                  * Otherwise, user wrote SELECT DISTINCT with an ORDER BY item that
1497                  * does not appear anywhere in the SELECT targetlist, and we can't
1498                  * implement that with only one sorting pass...
1499                  */
1500                 foreach(slitem, *sortClause)
1501                 {
1502                         SortClause *scl = (SortClause *) lfirst(slitem);
1503                         TargetEntry *tle = get_sortgroupclause_tle(scl, *targetlist);
1504
1505                         if (tle->resjunk)
1506                                 ereport(ERROR,
1507                                                 (errcode(ERRCODE_INVALID_COLUMN_REFERENCE),
1508                                                  errmsg("for SELECT DISTINCT, ORDER BY expressions must appear in select list")));
1509                         else
1510                                 result = lappend(result, copyObject(scl));
1511                 }
1512         }
1513         else
1514         {
1515                 /* We had SELECT DISTINCT ON (expr, ...) */
1516
1517                 /*
1518                  * If the user writes both DISTINCT ON and ORDER BY, then the two
1519                  * expression lists must match (until one or the other runs out).
1520                  * Otherwise the ORDER BY requires a different sort order than the
1521                  * DISTINCT does, and we can't implement that with only one sort pass
1522                  * (and if we do two passes, the results will be rather
1523                  * unpredictable). However, it's OK to have more DISTINCT ON
1524                  * expressions than ORDER BY expressions; we can just add the extra
1525                  * DISTINCT values to the sort list, much as we did above for ordinary
1526                  * DISTINCT fields.
1527                  *
1528                  * Actually, it'd be OK for the common prefixes of the two lists to
1529                  * match in any order, but implementing that check seems like more
1530                  * trouble than it's worth.
1531                  */
1532                 ListCell   *nextsortlist = list_head(*sortClause);
1533
1534                 foreach(dlitem, distinctlist)
1535                 {
1536                         TargetEntry *tle;
1537
1538                         tle = findTargetlistEntry(pstate, lfirst(dlitem),
1539                                                                           targetlist, DISTINCT_ON_CLAUSE);
1540
1541                         if (nextsortlist != NULL)
1542                         {
1543                                 SortClause *scl = (SortClause *) lfirst(nextsortlist);
1544
1545                                 if (tle->ressortgroupref != scl->tleSortGroupRef)
1546                                         ereport(ERROR,
1547                                                         (errcode(ERRCODE_INVALID_COLUMN_REFERENCE),
1548                                                          errmsg("SELECT DISTINCT ON expressions must match initial ORDER BY expressions")));
1549                                 result = lappend(result, copyObject(scl));
1550                                 nextsortlist = lnext(nextsortlist);
1551                         }
1552                         else
1553                         {
1554                                 *sortClause = addTargetToSortList(pstate, tle,
1555                                                                                                   *sortClause, *targetlist,
1556                                                                                                   SORTBY_ASC, NIL, true);
1557
1558                                 /*
1559                                  * Probably, the tle should always have been added at the end
1560                                  * of the sort list ... but search to be safe.
1561                                  */
1562                                 foreach(slitem, *sortClause)
1563                                 {
1564                                         SortClause *scl = (SortClause *) lfirst(slitem);
1565
1566                                         if (tle->ressortgroupref == scl->tleSortGroupRef)
1567                                         {
1568                                                 result = lappend(result, copyObject(scl));
1569                                                 break;
1570                                         }
1571                                 }
1572                                 if (slitem == NULL)             /* should not happen */
1573                                         elog(ERROR, "failed to add DISTINCT ON clause to target list");
1574                         }
1575                 }
1576         }
1577
1578         return result;
1579 }
1580
1581 /*
1582  * addAllTargetsToSortList
1583  *              Make sure all non-resjunk targets in the targetlist are in the
1584  *              ORDER BY list, adding the not-yet-sorted ones to the end of the list.
1585  *              This is typically used to help implement SELECT DISTINCT.
1586  *
1587  * See addTargetToSortList for info about pstate and resolveUnknown inputs.
1588  *
1589  * Returns the updated ORDER BY list.
1590  */
1591 List *
1592 addAllTargetsToSortList(ParseState *pstate, List *sortlist,
1593                                                 List *targetlist, bool resolveUnknown)
1594 {
1595         ListCell   *l;
1596
1597         foreach(l, targetlist)
1598         {
1599                 TargetEntry *tle = (TargetEntry *) lfirst(l);
1600
1601                 if (!tle->resjunk)
1602                         sortlist = addTargetToSortList(pstate, tle,
1603                                                                                    sortlist, targetlist,
1604                                                                                    SORTBY_ASC, NIL,
1605                                                                                    resolveUnknown);
1606         }
1607         return sortlist;
1608 }
1609
1610 /*
1611  * addTargetToSortList
1612  *              If the given targetlist entry isn't already in the ORDER BY list,
1613  *              add it to the end of the list, using the sortop with given name
1614  *              or the default sort operator if opname == NIL.
1615  *
1616  * If resolveUnknown is TRUE, convert TLEs of type UNKNOWN to TEXT.  If not,
1617  * do nothing (which implies the search for a sort operator will fail).
1618  * pstate should be provided if resolveUnknown is TRUE, but can be NULL
1619  * otherwise.
1620  *
1621  * Returns the updated ORDER BY list.
1622  */
1623 List *
1624 addTargetToSortList(ParseState *pstate, TargetEntry *tle,
1625                                         List *sortlist, List *targetlist,
1626                                         int sortby_kind, List *sortby_opname,
1627                                         bool resolveUnknown)
1628 {
1629         /* avoid making duplicate sortlist entries */
1630         if (!targetIsInSortList(tle, sortlist))
1631         {
1632                 SortClause *sortcl = makeNode(SortClause);
1633                 Oid                     restype = exprType((Node *) tle->expr);
1634
1635                 /* if tlist item is an UNKNOWN literal, change it to TEXT */
1636                 if (restype == UNKNOWNOID && resolveUnknown)
1637                 {
1638                         tle->expr = (Expr *) coerce_type(pstate, (Node *) tle->expr,
1639                                                                                          restype, TEXTOID, -1,
1640                                                                                          COERCION_IMPLICIT,
1641                                                                                          COERCE_IMPLICIT_CAST);
1642                         restype = TEXTOID;
1643                 }
1644
1645                 sortcl->tleSortGroupRef = assignSortGroupRef(tle, targetlist);
1646
1647                 switch (sortby_kind)
1648                 {
1649                         case SORTBY_ASC:
1650                                 sortcl->sortop = ordering_oper_opid(restype);
1651                                 break;
1652                         case SORTBY_DESC:
1653                                 sortcl->sortop = reverse_ordering_oper_opid(restype);
1654                                 break;
1655                         case SORTBY_USING:
1656                                 Assert(sortby_opname != NIL);
1657                                 sortcl->sortop = compatible_oper_opid(sortby_opname,
1658                                                                                                           restype,
1659                                                                                                           restype,
1660                                                                                                           false);
1661                                 break;
1662                         default:
1663                                 elog(ERROR, "unrecognized sortby_kind: %d", sortby_kind);
1664                                 break;
1665                 }
1666
1667                 sortlist = lappend(sortlist, sortcl);
1668         }
1669         return sortlist;
1670 }
1671
1672 /*
1673  * assignSortGroupRef
1674  *        Assign the targetentry an unused ressortgroupref, if it doesn't
1675  *        already have one.  Return the assigned or pre-existing refnumber.
1676  *
1677  * 'tlist' is the targetlist containing (or to contain) the given targetentry.
1678  */
1679 Index
1680 assignSortGroupRef(TargetEntry *tle, List *tlist)
1681 {
1682         Index           maxRef;
1683         ListCell   *l;
1684
1685         if (tle->ressortgroupref)       /* already has one? */
1686                 return tle->ressortgroupref;
1687
1688         /* easiest way to pick an unused refnumber: max used + 1 */
1689         maxRef = 0;
1690         foreach(l, tlist)
1691         {
1692                 Index           ref = ((TargetEntry *) lfirst(l))->ressortgroupref;
1693
1694                 if (ref > maxRef)
1695                         maxRef = ref;
1696         }
1697         tle->ressortgroupref = maxRef + 1;
1698         return tle->ressortgroupref;
1699 }
1700
1701 /*
1702  * targetIsInSortList
1703  *              Is the given target item already in the sortlist?
1704  *
1705  * Works for both SortClause and GroupClause lists.  Note that the main
1706  * reason we need this routine (and not just a quick test for nonzeroness
1707  * of ressortgroupref) is that a TLE might be in only one of the lists.
1708  */
1709 bool
1710 targetIsInSortList(TargetEntry *tle, List *sortList)
1711 {
1712         Index           ref = tle->ressortgroupref;
1713         ListCell   *l;
1714
1715         /* no need to scan list if tle has no marker */
1716         if (ref == 0)
1717                 return false;
1718
1719         foreach(l, sortList)
1720         {
1721                 SortClause *scl = (SortClause *) lfirst(l);
1722
1723                 if (scl->tleSortGroupRef == ref)
1724                         return true;
1725         }
1726         return false;
1727 }